pocket-tutor / core /analyzer.py
Codex
Add smoke tests and tighten model contract
cb14994
from __future__ import annotations
from collections.abc import Callable
from dataclasses import dataclass
from typing import Any
try:
import spaces
except ImportError:
# Use a no-op GPU decorator during local development.
class _LocalSpacesFallback:
@staticmethod
def GPU(
duration: int = 30,
) -> Callable[[Callable[..., Any]], Callable[..., Any]]:
def decorator(function: Callable[..., Any]) -> Callable[..., Any]:
return function
return decorator
spaces = _LocalSpacesFallback()
from core.inference import run_tutor_inference, transcribe_audio
from core.parser import parse_sections, stringify_content, validate_image_path
from env.config import MODEL_ID, PARAMETER_COUNT, QUESTION_LIMIT, TRANSCRIPT_LIMIT
# The tutor prompt keeps the app educational rather than answer-dumping.
TUTOR_SYSTEM_PROMPT = (
"You are Pocket Tutor, a patient multimodal homework coach. "
"Help the learner understand the problem without shaming them or simply dumping final answers. "
"For math and science, show compact steps and check the answer. "
"For writing or reading, explain the reasoning and offer a revision or clue. "
"If the uploaded image is unclear, say what information is missing. "
"Do not solve active graded tests, exams, or requests to cheat; offer study guidance instead."
)
@dataclass(frozen=True)
class TutorReport:
"""Structure containing the prompt context, execution logs, and parsed tutoring result."""
student_context: str
model_path: str
problem: str
knowns: str
strategy: str
steps: str
check: str
hint: str
parent: str
def build_tutor_prompt(
question: str,
transcript: str,
grade_band: str,
help_mode: str,
image_status: str,
) -> str:
"""Builds the multimodal tutoring prompt expected by the fine-tuned adapter."""
combined_question = "\n".join(
part for part in [question.strip(), transcript.strip()] if part
).strip()
return f"""{TUTOR_SYSTEM_PROMPT}
Student grade band: {grade_band}
Help mode: {help_mode}
Image status: {image_status}
Return exactly these sections:
Important:
- Output all seven sections every time, in the exact order below.
- Keep each section concise, but do not omit any section.
- Do not stop after WORKED STEPS. You must also fill CHECK, NEXT HINT, and PARENT NOTE.
- The final section must always be PARENT NOTE and must be a complete single sentence.
- If the answer is partial, say so inside WORKED STEPS and continue to the later sections.
=== PROBLEM READ ===
[Briefly restate what the learner is asking.]
=== KNOWNS ===
- [List useful givens, facts, or constraints.]
=== STRATEGY ===
[Explain the plan in 2-4 sentences.]
=== WORKED STEPS ===
[Give concise step-by-step help. For hint mode, stop before the final answer.]
=== CHECK ===
[Check units, logic, or answer quality.]
=== NEXT HINT ===
[Ask one short follow-up question or give the smallest next hint.]
=== PARENT NOTE ===
[One complete sentence a parent/tutor can use to support the learner.]
Learner question:
{combined_question[:QUESTION_LIMIT] or "The learner uploaded an image and did not type a question."}
"""
def analyze_homework(
image_file: object | None,
question: Any,
audio_path: object | None,
grade_band: str,
help_mode: str,
) -> TutorReport:
"""Orchestrates image validation, speech transcription, inference, and parsing."""
# Convert text and microphone input before building the model prompt.
typed_question = stringify_content(question)[:QUESTION_LIMIT]
transcript, transcript_log = transcribe_audio(audio_path)
transcript = transcript[:TRANSCRIPT_LIMIT]
image_path, image_status = validate_image_path(image_file)
# Build a traceable prompt that matches the Modal training format.
prompt = build_tutor_prompt(
typed_question,
transcript,
grade_band,
help_mode,
image_status,
)
response, inference_log = run_tutor_inference(prompt, image_path)
sections = parse_sections(response)
# Keep diagnostics clear but never hide a model-unavailable path.
model_path = "\n".join(
[
f"Primary model: {MODEL_ID}",
f"Parameters: {PARAMETER_COUNT}",
"Execution flow: local Space runtime; no external answer API",
"---",
image_status,
transcript_log,
inference_log,
]
)
student_context = "\n".join(
part
for part in [
f"Grade band: {grade_band}",
f"Help mode: {help_mode}",
f"Typed question: {typed_question}" if typed_question else "",
f"Voice transcript: {transcript}" if transcript else "",
image_status,
]
if part
)
return TutorReport(student_context, model_path, *sections)
@spaces.GPU(duration=45)
def analyze_homework_ui(
image_file: object | None,
question: Any,
audio_path: object | None,
grade_band: str,
help_mode: str,
) -> tuple[str, str, str, str, str, str, str, str, str]:
"""Gradio-compatible GPU entry point for tutoring analysis."""
# Convert the report into stable component outputs.
report = analyze_homework(image_file, question, audio_path, grade_band, help_mode)
return (
report.student_context,
report.model_path,
report.problem,
report.knowns,
report.strategy,
report.steps,
report.check,
report.hint,
report.parent,
)
def reset_outputs() -> tuple[str, str, str, str, str, str, str, str, str]:
"""Clears visible outputs before a fresh tutoring run."""
# Keep click feedback immediate while the model loads.
return (
"Preparing tutoring context...",
"Starting local model execution...",
"",
"",
"",
"",
"",
"",
"",
)