File size: 16,335 Bytes
1b95620
 
 
 
 
 
 
 
 
55194e0
1b95620
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55194e0
 
 
 
 
1b95620
 
 
 
55194e0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1b95620
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55194e0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
"""
app.py – Children's Learning Router Service
Runs on Hugging Face Spaces (CPU-only Docker) with uvicorn + FastAPI (ASGI native).
Validates serv_code header, uses Qwen2.5-3B-Instruct to decide routing,
then asynchronously forwards the full payload to the appropriate downstream URL.
"""

import os
import json
import asyncio
import logging
from contextlib import asynccontextmanager

import httpx
from fastapi import FastAPI, Request
from fastapi.responses import JSONResponse
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

# ──────────────────────────────────────────────
# Logging
# ──────────────────────────────────────────────
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s  %(levelname)s  %(message)s",
)
logger = logging.getLogger(__name__)

# ──────────────────────────────────────────────
# Environment / Secrets
# (set in HF Spaces β†’ Settings β†’ Repository secrets)
# ──────────────────────────────────────────────
SERV_CODE      = os.environ.get("SERV_CODE", "")
CF_API_TOKEN   = os.environ.get("CF_API_TOKEN", "")
CF_ACCOUNT_ID  = os.environ.get("CF_ACCOUNT_ID", "")

BLOCK_URL      = os.environ.get("BLOCK_URL",      "https://blockchakalaka.onrender.com")
CHITCHAT_URL   = os.environ.get("CHITCHAT_URL",   "https://chitchatchakalaka.onrender.com")
QUESTION_URL   = os.environ.get("QUESTION_URL",   "https://questionchakalaka.onrender.com")
CURRICULUM_URL = os.environ.get("CURRICULUM_URL", "https://currichakalaka.onrender.com")

DECISION_URL_MAP = {
    "Block":      BLOCK_URL,
    "Chitchat":   CHITCHAT_URL,
    "Question":   QUESTION_URL,
    "Curriculum": CURRICULUM_URL,
}

# ──────────────────────────────────────────────
# Model globals
# ──────────────────────────────────────────────
MODEL_NAME = "Qwen/Qwen2.5-3B-Instruct"
tokenizer  = None
model      = None


def load_model() -> None:
    """Load Qwen2.5-3B-Instruct once at startup."""
    global tokenizer, model
    logger.info("Loading %s on CPU …", MODEL_NAME)
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
    model = AutoModelForCausalLM.from_pretrained(
        MODEL_NAME,
        torch_dtype=torch.float32,   # CPU-safe
        device_map="cpu",
        low_cpu_mem_usage=True,
    )
    model.eval()
    logger.info("Model ready.")


# ──────────────────────────────────────────────
# FastAPI lifespan β€” loads model before first request
# ──────────────────────────────────────────────
@asynccontextmanager
async def lifespan(app: FastAPI):
    load_model()   # runs at startup, before any request is served
    yield
    # nothing to clean up on shutdown


app = FastAPI(lifespan=lifespan)

# ──────────────────────────────────────────────
# System Prompt
# ──────────────────────────────────────────────
SYSTEM_PROMPT = """You are a routing agent for a children's educational app (ages 5-12).

Read the student context and output EXACTLY ONE word:
    Block | Curriculum | Question | Chitchat

RULES β€” evaluate in this strict order:

1. BLOCK
   - request_message contains abusive, sexual, violent, hateful, or adult content.
   - OR the child has been persistently abusive across multiple turns in chat_history.
   β†’ Output: Block

2. CURRICULUM
   - request_message is clearly related to current_learning goals.
   β†’ Output: Curriculum

3. QUESTION
   - request_message is educational / knowledge-based but NOT related to current_learning.
   (Could be another subject, a past/future lesson, or general knowledge.)
   β†’ Output: Question

4. CHITCHAT
   - Everything else: greetings, jokes, feelings, random comments, playful chat.
   β†’ Output: Chitchat

CRITICAL:
- Output the single decision word ONLY. No punctuation, no explanation.
- Follow the numbered order strictly.
- Prefer Curriculum over Question when current_learning is involved.
- Prefer Question over Chitchat when the message is educational.
"""


def build_user_content(payload: dict) -> str:
    """Serialise the full learning context into a prompt for the model."""
    lp               = payload.get("learning_path", {})
    query            = payload.get("query", {})
    current_learning = lp.get("assessment_stages", {}).get("current_learning", [])

    return f"""=== STUDENT CONTEXT ===
Board: {lp.get('board', 'N/A')}
Class: {lp.get('class', 'N/A')}
Subject: {lp.get('subject', 'N/A')}
Student Name: {lp.get('student_name', 'N/A')}
Teacher Persona: {lp.get('teacher_persona', 'N/A')}

=== CURRICULUM OBJECTIVES ===
{json.dumps(lp.get('curriculum_objectives', []), indent=2)}

=== CURRENT LEARNING (active topic) ===
{json.dumps(current_learning, indent=2)}

=== CHAT HISTORY ===
{json.dumps(lp.get('chat_history', []), indent=2)}

=== SCRATCHPAD ===
{json.dumps(lp.get('scratchpad', []), indent=2)}

=== STUDENT'S CURRENT REQUEST ===
"{query.get('request_message', '')}"

Output your single decision word:"""


def get_decision(payload: dict) -> str:
    """Run Qwen inference and return one of: Block, Curriculum, Question, Chitchat."""
    messages = [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user",   "content": build_user_content(payload)},
    ]

    text   = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    inputs = tokenizer([text], return_tensors="pt")

    with torch.no_grad():
        output_ids = model.generate(
            **inputs,
            max_new_tokens=5,
            do_sample=False,
            pad_token_id=tokenizer.eos_token_id,
        )

    new_tokens = output_ids[0][inputs["input_ids"].shape[1]:]
    raw        = tokenizer.decode(new_tokens, skip_special_tokens=True).strip().lower()
    logger.info("Raw model output: %r", raw)

    if "block"      in raw: return "Block"
    if "curriculum" in raw: return "Curriculum"
    if "question"   in raw: return "Question"
    return "Chitchat"


# ──────────────────────────────────────────────
# Cloudflare IP blocking helper
# ──────────────────────────────────────────────
async def block_ip_cloudflare(ip: str) -> None:
    if not CF_API_TOKEN or not CF_ACCOUNT_ID:
        logger.warning("Cloudflare secrets not configured – skipping IP block for %s", ip)
        return

    url = f"https://api.cloudflare.com/client/v4/accounts/{CF_ACCOUNT_ID}/firewall/access_rules/rules"
    cf_headers = {
        "Authorization": f"Bearer {CF_API_TOKEN}",
        "Content-Type":  "application/json",
    }
    body = {
        "mode": "block",
        "configuration": {"target": "ip", "value": ip},
        "notes": "Auto-blocked: invalid serv_code",
    }
    try:
        async with httpx.AsyncClient(timeout=10.0) as client:
            resp = await client.post(url, headers=cf_headers, json=body)
            logger.info("Cloudflare block %s β†’ HTTP %s", ip, resp.status_code)
    except Exception as exc:
        logger.error("Cloudflare block failed for %s: %s", ip, exc)


# ──────────────────────────────────────────────
# Downstream forwarding helper
# ──────────────────────────────────────────────
async def forward_request(target_url: str, payload: dict, serv_code: str) -> tuple[dict, int]:
    """
    POST the full payload to the chosen downstream service.
    Retries up to 3 times on ConnectError or TimeoutException with exponential backoff.
    Uses split timeouts to handle Render cold-starts gracefully.
    """
    fwd_headers = {
        "Content-Type": "application/json",
        "serv_code":    serv_code,
    }

    # Split timeouts: generous connect window for Render cold-starts,
    # generous read window for slow inference on downstream services.
    timeout = httpx.Timeout(
        connect=60.0,   # Render free-tier cold-start can take 30–50 s
        read=120.0,     # downstream inference may be slow
        write=30.0,
        pool=10.0,
    )

    last_exc: Exception | None = None

    for attempt in range(1, 4):   # attempts 1, 2, 3
        try:
            logger.info(
                "Forward attempt %d/3 β†’ %s", attempt, target_url
            )
            async with httpx.AsyncClient(timeout=timeout) as client:
                resp = await client.post(target_url, json=payload, headers=fwd_headers)
                logger.info(
                    "Downstream %s β†’ HTTP %s (attempt %d)", target_url, resp.status_code, attempt
                )
                try:
                    return resp.json(), resp.status_code
                except Exception as parse_exc:
                    logger.warning(
                        "Could not parse JSON from %s (HTTP %s): %s β€” returning raw text",
                        target_url, resp.status_code, parse_exc,
                    )
                    return {"raw_response": resp.text}, resp.status_code

        except httpx.TimeoutException as exc:
            last_exc = exc
            logger.warning(
                "Attempt %d/3 TIMEOUT for %s | type=%s | detail=%s",
                attempt, target_url, type(exc).__name__, exc,
            )

        except httpx.ConnectError as exc:
            # This is the primary cause of 502s with Render cold-starts:
            # the service is sleeping and refuses/resets the connection.
            last_exc = exc
            logger.warning(
                "Attempt %d/3 CONNECT ERROR for %s | type=%s | detail=%s",
                attempt, target_url, type(exc).__name__, exc,
            )

        except httpx.HTTPStatusError as exc:
            # Downstream returned a 4xx/5xx β€” no point retrying.
            logger.error(
                "Downstream %s returned HTTP error (attempt %d): status=%s body=%s",
                target_url, attempt, exc.response.status_code, exc.response.text,
            )
            return {"error": f"Downstream HTTP error: {exc.response.status_code}"}, exc.response.status_code

        except Exception as exc:
            # Unexpected / non-retryable error β€” log full traceback and bail.
            logger.exception(
                "Attempt %d/3 UNEXPECTED ERROR for %s | type=%s | detail=%s",
                attempt, target_url, type(exc).__name__, exc,
            )
            return {"error": f"Unexpected forwarding error: {exc}"}, 502

        # Exponential backoff before next attempt (2 s, 4 s)
        if attempt < 3:
            backoff = 2 ** attempt
            logger.info("Backing off %ds before retry …", backoff)
            await asyncio.sleep(backoff)

    # All 3 attempts exhausted
    logger.error(
        "All 3 forward attempts failed for %s | last_error_type=%s | last_error=%s",
        target_url, type(last_exc).__name__, last_exc,
    )
    return {
        "error": f"Downstream unreachable after 3 attempts: {target_url}",
        "last_error_type": type(last_exc).__name__,
        "last_error": str(last_exc),
    }, 502


# ──────────────────────────────────────────────
# Shared helpers
# ──────────────────────────────────────────────
def get_caller_ip(req: Request) -> str:
    forwarded = req.headers.get("x-forwarded-for", "")
    if forwarded:
        return forwarded.split(",")[0].strip()
    return req.client.host if req.client else "unknown"


# ──────────────────────────────────────────────
# Routes
# ──────────────────────────────────────────────
@app.get("/health")
async def health():
    """Public liveness probe β€” no auth required."""
    return {"status": "ok"}


@app.get("/ping")
async def ping(request: Request):
    """
    Authenticated liveness probe.
    Validates serv_code header. Blocks invalid callers in Cloudflare.
    """
    incoming_code = request.headers.get("serv_code", "")

    if not incoming_code or incoming_code != SERV_CODE:
        caller_ip = get_caller_ip(request)
        logger.warning("Ping rejected – invalid serv_code from IP %s", caller_ip)
        await block_ip_cloudflare(caller_ip)
        return JSONResponse(status_code=401, content={"error": "Unauthorized"})

    return JSONResponse(content={
        "status":  "alive",
        "service": "children-learning-router",
        "model":   MODEL_NAME,
    })


@app.post("/chat")
async def chat(request: Request):
    """
    Main routing endpoint.
    1. Validate serv_code header.
    2. Parse and validate JSON body.
    3. Get routing decision from Qwen.
    4. Async-forward payload to chosen downstream service.
    5. Return downstream response to caller.
    """

    # ── 1. Auth ──────────────────────────────
    incoming_code = request.headers.get("serv_code", "")
    if not incoming_code or incoming_code != SERV_CODE:
        caller_ip = get_caller_ip(request)
        logger.warning("Invalid serv_code from IP %s", caller_ip)
        await block_ip_cloudflare(caller_ip)
        return JSONResponse(status_code=401, content={"error": "Unauthorized"})

    # ── 2. Parse body ────────────────────────
    try:
        payload = await request.json()
    except Exception:
        return JSONResponse(status_code=400, content={"error": "Request body must be valid JSON"})

    if "learning_path" not in payload:
        return JSONResponse(status_code=400, content={"error": "Missing required field: learning_path"})
    if "query" not in payload:
        return JSONResponse(status_code=400, content={"error": "Missing required field: query"})
    if "request_message" not in payload.get("query", {}):
        return JSONResponse(status_code=400, content={"error": "Missing required field: query.request_message"})

    for field in ["board", "class", "subject", "student_name", "teacher_persona"]:
        if field not in payload["learning_path"]:
            return JSONResponse(status_code=400, content={"error": f"Missing required field: learning_path.{field}"})

    # ── 3. Decision ──────────────────────────
    try:
        decision = get_decision(payload)
    except Exception as exc:
        logger.exception("Model inference error: %s", exc)
        return JSONResponse(status_code=500, content={"error": "Model inference failed"})

    logger.info("Routing decision: %s", decision)
    target_url = DECISION_URL_MAP.get(decision, CHITCHAT_URL)

    # ── 4. Forward ───────────────────────────
    response_body, status_code = await forward_request(target_url, payload, incoming_code)

    # ── 5. Return ────────────────────────────
    return JSONResponse(status_code=status_code, content={
        "decision":  decision,
        "forwarded": target_url,
        "response":  response_body,
    })