answerevaluate / exampleapp.py
triflix's picture
Upload 45 files
74ba611 verified
from fastapi import FastAPI, HTTPException, UploadFile, Form, File
from pydantic import BaseModel, Field, field_validator
from typing import List, Dict, Union, Optional, Any
from enum import Enum
import os
from google import genai
from google.genai import types
import json
from dotenv import load_dotenv
import requests
from faster_whisper import WhisperModel
import uuid
from pathlib import Path
import aiofiles
import tempfile
load_dotenv()
GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
if not GROQ_API_KEY:
raise RuntimeError("Missing GROQ_API_KEY in environment variables")
app = FastAPI(
title="Educational Question API",
description="API for question evaluation and grading",
version="3.0.0"
)
# Enums
class QuestionType(str, Enum):
sentence = "sentence"
short = "short"
fill_blank = "fill_blank"
select = "select"
multi_select = "multi_select"
true_false = "true_false"
matching = "matching"
ordering = "ordering"
image_labeling = "image_labeling"
code_output = "code_output"
code_debug = "code_debug"
audio_transcribe = "audio_transcribe"
video_question = "video_question"
drag_drop = "drag_drop"
math_expression = "math_expression"
diagram = "diagram"
class Attachment(BaseModel):
type: str
url: Optional[str] = None
content: Optional[str] = None
class Metadata(BaseModel):
min_words: Optional[int] = None
max_words: Optional[int] = None
language: Optional[str] = "en"
difficulty: Optional[str] = "medium"
total_marks: Optional[int] = 100
class QuestionRequest(BaseModel):
question_type: QuestionType
question: str
correct_answer: Union[str, List[str], Dict[str, Any], bool] = Field(
...,
description="The correct/reference answer for this question"
)
options: Optional[Union[List[Any], Dict[str, Any]]] = []
metadata: Optional[Metadata] = None
attachments: Optional[List[Attachment]] = []
context: Optional[str] = Field(
None,
description="Additional context to help AI understand domain-specific knowledge"
)
@field_validator('correct_answer')
@classmethod
def validate_answer(cls, v, info):
q_type = info.data.get('question_type')
if q_type is None:
return v
if q_type == QuestionType.true_false and not isinstance(v, bool):
raise ValueError("Answer must be boolean for true_false type")
if q_type in [QuestionType.multi_select, QuestionType.ordering] and not isinstance(v, list):
raise ValueError(f"Answer must be a list for {q_type} type")
if q_type in [QuestionType.matching, QuestionType.image_labeling] and not isinstance(v, dict):
raise ValueError(f"Answer must be a dictionary for {q_type} type")
return v
class AIValidation(BaseModel):
is_valid: bool
suggestions: Optional[str] = None
class AIGrading(BaseModel):
ai_score: float
total_marks: int
feedback: Optional[str] = None
is_correct: bool
# Define binary vs subjective question types
BINARY_TYPES = [
QuestionType.short,
QuestionType.fill_blank,
QuestionType.select,
QuestionType.true_false,
QuestionType.matching,
QuestionType.ordering,
QuestionType.code_output,
QuestionType.image_labeling,
QuestionType.multi_select
]
SUBJECTIVE_TYPES = [
QuestionType.sentence,
QuestionType.code_debug,
QuestionType.math_expression
]
def validate_question_with_ai(question_data: dict) -> dict:
"""
Validates if a question is well-formed.
Returns: {"is_valid": bool, "suggestions": str or null}
"""
api_key = os.environ.get("GEMINI_API_KEY")
if not api_key:
return {
"is_valid": True,
"suggestions": "AI validation skipped - GEMINI_API_KEY not configured"
}
question_type = question_data.get("question_type")
context = question_data.get("context", "")
try:
client = genai.Client(api_key=api_key)
model = "gemini-flash-lite-latest"
question_text = question_data.get("question", "")
correct_answer = question_data.get("correct_answer", "")
options = question_data.get("options") or []
attachments = question_data.get("attachments") or []
context_section = ""
if context and context.strip():
context_section = f"\n## CONTEXT:\n{context.strip()}\n"
system_instruction = """You are an educational content validator.
Validate if the question is:
- Clear and unambiguous
- Grammatically correct
- Has proper formatting
- Includes correct answer in options (if applicable)
- Factually accurate
Return ONLY valid JSON:
{
"is_valid": <true/false>,
"suggestions": "<text or null>"
}
"""
prompt_text = f"""Validate this educational question:
**Type:** {question_type}
**Question:** {question_text}
**Correct Answer:** {json.dumps(correct_answer, ensure_ascii=False)}
**Options:** {json.dumps(options, ensure_ascii=False)}
**Attachments:** {json.dumps(attachments, ensure_ascii=False)}
{context_section}
Return validation result in JSON format."""
contents = [types.Content(role="user", parts=[types.Part.from_text(text=prompt_text)])]
config = types.GenerateContentConfig(
temperature=0.2,
response_mime_type="application/json",
system_instruction=[types.Part.from_text(text=system_instruction)]
)
response_parts = []
for chunk in client.models.generate_content_stream(model=model, contents=contents, config=config):
if text := getattr(chunk, "text", None):
response_parts.append(text)
response_text = "".join(response_parts).strip()
if response_text:
parsed = json.loads(response_text)
return {
"is_valid": parsed.get("is_valid", True),
"suggestions": parsed.get("suggestions")
}
return {"is_valid": True, "suggestions": None}
except Exception as e:
return {"is_valid": True, "suggestions": f"Validation error: {str(e)}"}
def grade_answer_with_ai(question_data: dict, student_answer: Any) -> dict:
"""
Grades a student's answer against the correct answer.
Returns: {
"ai_score": float,
"total_marks": int,
"feedback": str or null,
"is_correct": bool
}
"""
api_key = os.environ.get("GEMINI_API_KEY")
metadata = question_data.get("metadata") or {}
total_marks = metadata.get("total_marks", 100) if isinstance(metadata, dict) else 100
if not api_key:
return {
"ai_score": 0,
"total_marks": total_marks,
"feedback": "AI grading unavailable - GEMINI_API_KEY not configured",
"is_correct": False
}
question_type = question_data.get("question_type")
context = question_data.get("context", "")
try:
client = genai.Client(api_key=api_key)
model = "gemini-flash-lite-latest"
question_text = question_data.get("question", "")
correct_answer = question_data.get("correct_answer", "")
attachments = question_data.get("attachments") or []
context_section = ""
if context and context.strip():
context_section = f"\n## CONTEXT:\n{context.strip()}\n"
# Different grading logic for binary vs subjective
if question_type in BINARY_TYPES:
system_instruction = """You are an educational answer grader for EXACT-MATCH questions.
For binary question types (select, true_false, fill_blank, short, matching, ordering, code_output, image_labeling, multi_select):
- Compare student answer with correct answer
- Award full marks if correct, 0 if incorrect
- Be strict but account for minor formatting differences
- For text answers, ignore case and extra whitespace
- For lists/arrays, order matters unless it's multi_select
Return ONLY valid JSON:
{
"ai_score": <0 or total_marks>,
"total_marks": <number>,
"feedback": "<optional text>",
"is_correct": <true/false>
}
"""
prompt_text = f"""Grade this student answer:
**Question Type:** {question_type}
**Question:** {question_text}
**Correct Answer:** {json.dumps(correct_answer, ensure_ascii=False)}
**Student Answer:** {json.dumps(student_answer, ensure_ascii=False)}
**Total Marks:** {total_marks}
{context_section}
Compare and grade the student answer."""
else: # SUBJECTIVE_TYPES
system_instruction = """You are an educational answer grader for SUBJECTIVE questions.
For subjective question types (sentence, code_debug, math_expression):
- Compare student answer with correct/reference answer
- Award partial credit based on quality (0 to total_marks)
- Consider: accuracy, completeness, clarity, depth
- Check word count requirements if specified
- Provide constructive feedback
Return ONLY valid JSON:
{
"ai_score": <number>,
"total_marks": <number>,
"feedback": "<text>",
"is_correct": <true if score >= 70% of total_marks, else false>
}
Scoring guidelines:
- 90-100%: Excellent, comprehensive answer
- 70-89%: Good answer with minor gaps
- 50-69%: Acceptable but incomplete
- Below 50%: Significant issues or incorrect
"""
prompt_text = f"""Grade this student answer:
**Question Type:** {question_type}
**Question:** {question_text}
**Reference Answer:** {json.dumps(correct_answer, ensure_ascii=False)}
**Student Answer:** {json.dumps(student_answer, ensure_ascii=False)}
**Metadata:** {json.dumps(metadata, ensure_ascii=False)}
**Total Marks:** {total_marks}
{context_section}
Evaluate the student answer quality and provide detailed feedback."""
contents = [types.Content(role="user", parts=[types.Part.from_text(text=prompt_text)])]
config = types.GenerateContentConfig(
temperature=0.2,
response_mime_type="application/json",
system_instruction=[types.Part.from_text(text=system_instruction)]
)
response_parts = []
for chunk in client.models.generate_content_stream(model=model, contents=contents, config=config):
if text := getattr(chunk, "text", None):
response_parts.append(text)
response_text = "".join(response_parts).strip()
if not response_text:
return {
"ai_score": 0,
"total_marks": total_marks,
"feedback": "Empty AI response",
"is_correct": False
}
try:
parsed = json.loads(response_text)
ai_score = float(parsed.get("ai_score", 0))
ai_score = max(0, min(ai_score, total_marks))
feedback = parsed.get("feedback")
if feedback in [None, "null", "", "None"]:
feedback = None
is_correct = parsed.get("is_correct", ai_score >= (total_marks * 0.7))
return {
"ai_score": ai_score,
"total_marks": total_marks,
"feedback": feedback,
"is_correct": bool(is_correct)
}
except (json.JSONDecodeError, ValueError) as e:
return {
"ai_score": 0,
"total_marks": total_marks,
"feedback": f"Failed to parse AI response: {str(e)}",
"is_correct": False
}
except Exception as e:
return {
"ai_score": 0,
"total_marks": total_marks,
"feedback": f"Grading error: {str(e)}",
"is_correct": False
}
def transcribe_audio_groq(audio_url: str, model: str = "whisper-large-v3-turbo", response_format: str = "verbose_json", timeout: int = 60) -> dict:
"""
Transcribe audio located at `audio_url` using GROQ API.
Returns a dict: {"text": <string>, "language": <string>, "raw": <dict>}
Raises HTTPException on failure.
"""
if not audio_url:
raise HTTPException(status_code=400, detail="audio_url is required for transcription")
groq_url = "https://api.groq.com/openai/v1/audio/transcriptions"
headers = {"Authorization": f"Bearer {GROQ_API_KEY}"}
files = {
"model": (None, model),
"url": (None, audio_url),
"temperature": (None, "0"),
"response_format": (None, response_format),
}
try:
resp = requests.post(groq_url, headers=headers, files=files, timeout=timeout)
except requests.RequestException as e:
raise HTTPException(status_code=502, detail=f"GROQ transcription request failed: {str(e)}")
if resp.status_code != 200:
raise HTTPException(status_code=502, detail=f"GROQ transcription failed ({resp.status_code}): {resp.text}")
try:
data = resp.json()
except ValueError:
raise HTTPException(status_code=502, detail="GROQ transcription returned non-json response")
# Extract text from response
text = None
if isinstance(data, dict):
text = data.get("text") or data.get("transcription") or data.get("transcribed_text")
if not text and data.get("segments") and isinstance(data["segments"], list):
text = " ".join([seg.get("text", "").strip() for seg in data["segments"] if seg.get("text")])
if not text:
for k, v in data.items():
if isinstance(v, str) and len(v) > 0:
if len(v.split()) > 2:
text = v
break
if not text:
raise HTTPException(status_code=502, detail="Unable to extract transcription text from GROQ response")
language = None
if isinstance(data, dict):
language = data.get("language") or data.get("detected_language")
return {"text": text, "language": language, "raw": data}
# Local transcription setup
UPLOAD_DIR = Path(tempfile.gettempdir()) / "audio_uploads"
UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
_model_cache = {}
def get_model(model_size="small"):
"""Get or create cached Whisper model"""
device = "cuda" if os.environ.get("SPACE_TYPE") == "PRO" else "cpu"
compute_type = "int8_float16" if device == "cuda" else "int8"
key = (model_size, device, compute_type)
if key not in _model_cache:
_model_cache[key] = WhisperModel(
model_size,
device=device,
compute_type=compute_type,
download_root=str(Path(os.environ.get("HF_HOME", "/tmp/huggingface")) / "hub")
)
return _model_cache[key]
async def transcribe_audio_local(file: UploadFile, model_size: str = "small") -> dict:
"""Transcribe audio file locally using faster_whisper"""
if not file.content_type or not file.content_type.startswith("audio/"):
raise HTTPException(status_code=400, detail="Only audio files allowed")
temp_file = UPLOAD_DIR / f"{uuid.uuid4().hex}_{file.filename}"
async with aiofiles.open(temp_file, 'wb') as f:
while chunk := await file.read(8192):
await f.write(chunk)
try:
model = get_model(model_size)
segments, info = model.transcribe(str(temp_file), beam_size=5)
full_text = "\n".join(seg.text.strip() for seg in segments)
return {
"text": full_text,
"language": info.language,
"raw": {
"segments": [s._asdict() for s in segments],
"language": info.language
}
}
finally:
temp_file.unlink(missing_ok=True)
# Updated evaluate endpoint: supports audio_transcribe question type with audio_url / audio_answer_path
@app.post("/api/qa/evaluate")
async def evaluate_question_and_answer(payload: dict):
"""
Unified endpoint — accepts a question + student answer JSON and returns
validation + grading + structured output. Supports all question types,
including audio_transcribe (accepts audio_url or audio_answer_path and will call GROQ to transcribe).
"""
try:
question_data = payload.get("question")
student_response = payload.get("student_answer")
if not question_data or not student_response:
raise HTTPException(status_code=400, detail="Missing 'question' or 'student_answer' field")
# Backwards compatibility: allow "answer" key in question input
if "answer" in question_data:
question_data["correct_answer"] = question_data["answer"]
# Validate structure via Pydantic
try:
question_obj = QuestionRequest(**question_data)
except Exception as e:
raise HTTPException(status_code=422, detail=f"Invalid question format: {str(e)}")
# Decide how to obtain the student's textual answer
student_answer_text = None
transcription_info = None
# If question type is audio_transcribe, accept audio_url or audio_answer_path and transcribe if needed
if question_obj.question_type == QuestionType.audio_transcribe:
# Prefer explicit provided 'answer' text if present
provided_text = None
if isinstance(student_response, dict):
provided_text = student_response.get("answer") or student_response.get("transcribed_text")
if provided_text and isinstance(provided_text, str) and provided_text.strip():
student_answer_text = provided_text.strip()
transcription_info = {"source": "provided_text", "audio_url": student_response.get("audio_url") or student_response.get("audio_answer_path")}
else:
# Look for audio URL fields
audio_url = None
if isinstance(student_response, dict):
audio_url = student_response.get("audio_url") or student_response.get("audio_answer_path") or student_response.get("audio_path")
if not audio_url:
raise HTTPException(status_code=400, detail="audio_transcribe question requires 'audio_url' or 'audio_answer_path' in student_answer when no text 'answer' provided")
# Transcribe using GROQ
transcription = transcribe_audio_groq(audio_url)
student_answer_text = transcription["text"]
transcription_info = {
"source": "groq_transcription",
"audio_url": audio_url,
"language": transcription.get("language"),
"raw_response": transcription.get("raw")
}
else:
# Non-audio types: accept 'answer' field directly. If student_response is just a primitive, try that.
if isinstance(student_response, dict):
# standard field name 'answer' or fallback to top-level fields
if "answer" in student_response:
student_answer_text = student_response.get("answer")
elif "student_answer" in student_response:
student_answer_text = student_response.get("student_answer")
else:
# if the student_response itself is the answer object (for multi-part answers), pass it through
student_answer_text = student_response.get("answer", student_response)
else:
student_answer_text = student_response
# Normalize a single primitive answer out of containers when possible
# For grading, we pass raw value (string/list/dict/bool) as expected by grade_answer_with_ai
student_answer_for_grading = student_answer_text
# Step 1 — AI validation (use the question object as provided)
ai_validation_raw = validate_question_with_ai(question_obj.model_dump())
ai_validation = AIValidation(**ai_validation_raw)
# Step 2 — AI grading (use question dict and extracted student answer)
ai_grading_raw = grade_answer_with_ai(question_obj.model_dump(), student_answer_for_grading)
ai_grading = AIGrading(**ai_grading_raw)
# Step 3 — Structured response
response = {
"status": "success",
"evaluation": {
"validation": {
"is_valid": ai_validation.is_valid,
"issues": [] if ai_validation.is_valid else ["Detected issues in question content"],
"suggestions": [] if not ai_validation.suggestions else [ai_validation.suggestions]
},
"grading": {
"ai_score": ai_grading.ai_score,
"total_marks": ai_grading.total_marks,
"is_correct": ai_grading.is_correct,
"feedback": ai_grading.feedback
}
},
"question_summary": {
"type": question_obj.question_type,
"text": question_obj.question,
"reference_answer": question_obj.correct_answer,
"metadata": question_obj.metadata.model_dump() if question_obj.metadata else None,
"attachments": [att.model_dump() for att in (question_obj.attachments or [])]
},
"student_response": {
# return original fields where possible and include transcription info if applicable
"original_submission": student_response,
"answer": student_answer_for_grading
}
}
if transcription_info:
response["student_response"]["transcription"] = transcription_info
return response
except HTTPException:
raise
except Exception as e:
raise HTTPException(status_code=500, detail=f"Evaluation error: {str(e)}")
@app.post("/api/qa/evaluate_local")
async def evaluate_question_and_answer_local(
question: str = Form(..., description="Question JSON string"),
audio_file: UploadFile = File(..., description="Audio file with student's answer"),
model_size: str = Form("small", description="Whisper model size: tiny, base, small, medium, large")
):
"""
Local evaluation endpoint using faster_whisper for transcription.
Accepts form-data:
- question: JSON string with question details (same format as /api/qa/evaluate)
- audio_file: Audio file containing student's spoken answer
- model_size: Optional whisper model size (default: small)
Example curl:
curl -X POST http://localhost:8000/api/qa/evaluate_local \
-F 'question={"question_type":"audio_transcribe","question":"What is AI?","correct_answer":"Artificial Intelligence"}' \
-F 'audio_file=@student_answer.mp3' \
-F 'model_size=small'
"""
try:
# Parse question JSON
try:
question_data = json.loads(question)
except json.JSONDecodeError as e:
raise HTTPException(status_code=400, detail=f"Invalid question JSON: {str(e)}")
if not question_data:
raise HTTPException(status_code=400, detail="Missing 'question' data")
# Backwards compatibility: allow "answer" key in question input
if "answer" in question_data:
question_data["correct_answer"] = question_data["answer"]
# Validate structure via Pydantic
try:
question_obj = QuestionRequest(**question_data)
except Exception as e:
raise HTTPException(status_code=422, detail=f"Invalid question format: {str(e)}")
# Transcribe audio locally using faster_whisper
transcription = await transcribe_audio_local(audio_file, model_size)
student_answer_text = transcription["text"]
transcription_info = {
"source": "local_whisper",
"model_size": model_size,
"language": transcription.get("language"),
"raw_response": transcription.get("raw")
}
# Step 1 — AI validation
ai_validation_raw = validate_question_with_ai(question_obj.model_dump())
ai_validation = AIValidation(**ai_validation_raw)
# Step 2 — AI grading
ai_grading_raw = grade_answer_with_ai(question_obj.model_dump(), student_answer_text)
ai_grading = AIGrading(**ai_grading_raw)
# Step 3 — Structured response
response = {
"status": "success",
"evaluation": {
"validation": {
"is_valid": ai_validation.is_valid,
"issues": [] if ai_validation.is_valid else ["Detected issues in question content"],
"suggestions": [] if not ai_validation.suggestions else [ai_validation.suggestions]
},
"grading": {
"ai_score": ai_grading.ai_score,
"total_marks": ai_grading.total_marks,
"is_correct": ai_grading.is_correct,
"feedback": ai_grading.feedback
}
},
"question_summary": {
"type": question_obj.question_type,
"text": question_obj.question,
"reference_answer": question_obj.correct_answer,
"metadata": question_obj.metadata.model_dump() if question_obj.metadata else None,
"attachments": [att.model_dump() for att in (question_obj.attachments or [])]
},
"student_response": {
"answer": student_answer_text,
"transcription": transcription_info
}
}
return response
except HTTPException:
raise
except Exception as e:
raise HTTPException(status_code=500, detail=f"Evaluation error: {str(e)}")
if __name__ == "__main__":
import uvicorn
print("=" * 60)
print("Educational Question API Server v3.0")
print("=" * 60)
print(f"Gemini API Key: {'✓ Configured' if os.environ.get('GEMINI_API_KEY') else '✗ NOT SET'}")
print(f"Model: gemini-flash-lite-latest")
print(f"Server: http://localhost:8000")
print(f"Docs: http://localhost:8000/docs")
print(f"Endpoints: /api/qa/evaluate, /api/qa/evaluate_local")
print("=" * 60)
uvicorn.run(app, host="0.0.0.0", port=8000)