Spaces:

triflix
/

answerevaluate

Sleeping

App Files Files Community

answerevaluate / exampleapp.py

triflix

Upload 45 files

74ba611 verified 3 months ago

raw

history blame contribute delete

27.2 kB

	from fastapi import FastAPI, HTTPException, UploadFile, Form, File
	from pydantic import BaseModel, Field, field_validator
	from typing import List, Dict, Union, Optional, Any
	from enum import Enum
	import os
	from google import genai
	from google.genai import types
	import json
	from dotenv import load_dotenv
	import requests
	from faster_whisper import WhisperModel
	import uuid
	from pathlib import Path
	import aiofiles
	import tempfile

	load_dotenv()

	GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
	if not GROQ_API_KEY:
	raise RuntimeError("Missing GROQ_API_KEY in environment variables")

	app = FastAPI(
	title="Educational Question API",
	description="API for question evaluation and grading",
	version="3.0.0"
	)

	# Enums
	class QuestionType(str, Enum):
	sentence = "sentence"
	short = "short"
	fill_blank = "fill_blank"
	select = "select"
	multi_select = "multi_select"
	true_false = "true_false"
	matching = "matching"
	ordering = "ordering"
	image_labeling = "image_labeling"
	code_output = "code_output"
	code_debug = "code_debug"
	audio_transcribe = "audio_transcribe"
	video_question = "video_question"
	drag_drop = "drag_drop"
	math_expression = "math_expression"
	diagram = "diagram"

	class Attachment(BaseModel):
	type: str
	url: Optional[str] = None
	content: Optional[str] = None

	class Metadata(BaseModel):
	min_words: Optional[int] = None
	max_words: Optional[int] = None
	language: Optional[str] = "en"
	difficulty: Optional[str] = "medium"
	total_marks: Optional[int] = 100

	class QuestionRequest(BaseModel):
	question_type: QuestionType
	question: str
	correct_answer: Union[str, List[str], Dict[str, Any], bool] = Field(
	...,
	description="The correct/reference answer for this question"
	)
	options: Optional[Union[List[Any], Dict[str, Any]]] = []
	metadata: Optional[Metadata] = None
	attachments: Optional[List[Attachment]] = []
	context: Optional[str] = Field(
	None,
	description="Additional context to help AI understand domain-specific knowledge"
	)

	@field_validator('correct_answer')
	@classmethod
	def validate_answer(cls, v, info):
	q_type = info.data.get('question_type')
	if q_type is None:
	return v
	if q_type == QuestionType.true_false and not isinstance(v, bool):
	raise ValueError("Answer must be boolean for true_false type")
	if q_type in [QuestionType.multi_select, QuestionType.ordering] and not isinstance(v, list):
	raise ValueError(f"Answer must be a list for {q_type} type")
	if q_type in [QuestionType.matching, QuestionType.image_labeling] and not isinstance(v, dict):
	raise ValueError(f"Answer must be a dictionary for {q_type} type")
	return v

	class AIValidation(BaseModel):
	is_valid: bool
	suggestions: Optional[str] = None

	class AIGrading(BaseModel):
	ai_score: float
	total_marks: int
	feedback: Optional[str] = None
	is_correct: bool

	# Define binary vs subjective question types
	BINARY_TYPES = [
	QuestionType.short,
	QuestionType.fill_blank,
	QuestionType.select,
	QuestionType.true_false,
	QuestionType.matching,
	QuestionType.ordering,
	QuestionType.code_output,
	QuestionType.image_labeling,
	QuestionType.multi_select
	]

	SUBJECTIVE_TYPES = [
	QuestionType.sentence,
	QuestionType.code_debug,
	QuestionType.math_expression
	]

	def validate_question_with_ai(question_data: dict) -> dict:
	"""
	Validates if a question is well-formed.
	Returns: {"is_valid": bool, "suggestions": str or null}
	"""
	api_key = os.environ.get("GEMINI_API_KEY")
	if not api_key:
	return {
	"is_valid": True,
	"suggestions": "AI validation skipped - GEMINI_API_KEY not configured"
	}

	question_type = question_data.get("question_type")
	context = question_data.get("context", "")

	try:
	client = genai.Client(api_key=api_key)
	model = "gemini-flash-lite-latest"

	question_text = question_data.get("question", "")
	correct_answer = question_data.get("correct_answer", "")
	options = question_data.get("options") or []
	attachments = question_data.get("attachments") or []

	context_section = ""
	if context and context.strip():
	context_section = f"\n## CONTEXT:\n{context.strip()}\n"

	system_instruction = """You are an educational content validator.
	Validate if the question is:
	- Clear and unambiguous
	- Grammatically correct
	- Has proper formatting
	- Includes correct answer in options (if applicable)
	- Factually accurate

	Return ONLY valid JSON:
	{
	"is_valid": <true/false>,
	"suggestions": "<text or null>"
	}
	"""

	prompt_text = f"""Validate this educational question:
	Type: {question_type}
	Question: {question_text}
	Correct Answer: {json.dumps(correct_answer, ensure_ascii=False)}
	Options: {json.dumps(options, ensure_ascii=False)}
	Attachments: {json.dumps(attachments, ensure_ascii=False)}
	{context_section}
	Return validation result in JSON format."""

	contents = [types.Content(role="user", parts=[types.Part.from_text(text=prompt_text)])]
	config = types.GenerateContentConfig(
	temperature=0.2,
	response_mime_type="application/json",
	system_instruction=[types.Part.from_text(text=system_instruction)]
	)

	response_parts = []
	for chunk in client.models.generate_content_stream(model=model, contents=contents, config=config):
	if text := getattr(chunk, "text", None):
	response_parts.append(text)

	response_text = "".join(response_parts).strip()
	if response_text:
	parsed = json.loads(response_text)
	return {
	"is_valid": parsed.get("is_valid", True),
	"suggestions": parsed.get("suggestions")
	}

	return {"is_valid": True, "suggestions": None}

	except Exception as e:
	return {"is_valid": True, "suggestions": f"Validation error: {str(e)}"}

	def grade_answer_with_ai(question_data: dict, student_answer: Any) -> dict:
	"""
	Grades a student's answer against the correct answer.
	Returns: {
	"ai_score": float,
	"total_marks": int,
	"feedback": str or null,
	"is_correct": bool
	}
	"""
	api_key = os.environ.get("GEMINI_API_KEY")
	metadata = question_data.get("metadata") or {}
	total_marks = metadata.get("total_marks", 100) if isinstance(metadata, dict) else 100

	if not api_key:
	return {
	"ai_score": 0,
	"total_marks": total_marks,
	"feedback": "AI grading unavailable - GEMINI_API_KEY not configured",
	"is_correct": False
	}

	question_type = question_data.get("question_type")
	context = question_data.get("context", "")

	try:
	client = genai.Client(api_key=api_key)
	model = "gemini-flash-lite-latest"

	question_text = question_data.get("question", "")
	correct_answer = question_data.get("correct_answer", "")
	attachments = question_data.get("attachments") or []

	context_section = ""
	if context and context.strip():
	context_section = f"\n## CONTEXT:\n{context.strip()}\n"

	# Different grading logic for binary vs subjective
	if question_type in BINARY_TYPES:
	system_instruction = """You are an educational answer grader for EXACT-MATCH questions.
	For binary question types (select, true_false, fill_blank, short, matching, ordering, code_output, image_labeling, multi_select):
	- Compare student answer with correct answer
	- Award full marks if correct, 0 if incorrect
	- Be strict but account for minor formatting differences
	- For text answers, ignore case and extra whitespace
	- For lists/arrays, order matters unless it's multi_select

	Return ONLY valid JSON:
	{
	"ai_score": <0 or total_marks>,
	"total_marks": <number>,
	"feedback": "<optional text>",
	"is_correct": <true/false>
	}
	"""

	prompt_text = f"""Grade this student answer:
	Question Type: {question_type}
	Question: {question_text}
	Correct Answer: {json.dumps(correct_answer, ensure_ascii=False)}
	Student Answer: {json.dumps(student_answer, ensure_ascii=False)}
	Total Marks: {total_marks}
	{context_section}
	Compare and grade the student answer."""

	else: # SUBJECTIVE_TYPES
	system_instruction = """You are an educational answer grader for SUBJECTIVE questions.
	For subjective question types (sentence, code_debug, math_expression):
	- Compare student answer with correct/reference answer
	- Award partial credit based on quality (0 to total_marks)
	- Consider: accuracy, completeness, clarity, depth
	- Check word count requirements if specified
	- Provide constructive feedback

	Return ONLY valid JSON:
	{
	"ai_score": <number>,
	"total_marks": <number>,
	"feedback": "<text>",
	"is_correct": <true if score >= 70% of total_marks, else false>
	}

	Scoring guidelines:
	- 90-100%: Excellent, comprehensive answer
	- 70-89%: Good answer with minor gaps
	- 50-69%: Acceptable but incomplete
	- Below 50%: Significant issues or incorrect
	"""

	prompt_text = f"""Grade this student answer:
	Question Type: {question_type}
	Question: {question_text}
	Reference Answer: {json.dumps(correct_answer, ensure_ascii=False)}
	Student Answer: {json.dumps(student_answer, ensure_ascii=False)}
	Metadata: {json.dumps(metadata, ensure_ascii=False)}
	Total Marks: {total_marks}
	{context_section}
	Evaluate the student answer quality and provide detailed feedback."""

	contents = [types.Content(role="user", parts=[types.Part.from_text(text=prompt_text)])]
	config = types.GenerateContentConfig(
	temperature=0.2,
	response_mime_type="application/json",
	system_instruction=[types.Part.from_text(text=system_instruction)]
	)

	response_parts = []
	for chunk in client.models.generate_content_stream(model=model, contents=contents, config=config):
	if text := getattr(chunk, "text", None):
	response_parts.append(text)

	response_text = "".join(response_parts).strip()

	if not response_text:
	return {
	"ai_score": 0,
	"total_marks": total_marks,
	"feedback": "Empty AI response",
	"is_correct": False
	}

	try:
	parsed = json.loads(response_text)
	ai_score = float(parsed.get("ai_score", 0))
	ai_score = max(0, min(ai_score, total_marks))

	feedback = parsed.get("feedback")
	if feedback in [None, "null", "", "None"]:
	feedback = None

	is_correct = parsed.get("is_correct", ai_score >= (total_marks * 0.7))

	return {
	"ai_score": ai_score,
	"total_marks": total_marks,
	"feedback": feedback,
	"is_correct": bool(is_correct)
	}

	except (json.JSONDecodeError, ValueError) as e:
	return {
	"ai_score": 0,
	"total_marks": total_marks,
	"feedback": f"Failed to parse AI response: {str(e)}",
	"is_correct": False
	}

	except Exception as e:
	return {
	"ai_score": 0,
	"total_marks": total_marks,
	"feedback": f"Grading error: {str(e)}",
	"is_correct": False
	}

	def transcribe_audio_groq(audio_url: str, model: str = "whisper-large-v3-turbo", response_format: str = "verbose_json", timeout: int = 60) -> dict:
	"""
	Transcribe audio located at `audio_url` using GROQ API.
	Returns a dict: {"text": <string>, "language": <string>, "raw": <dict>}
	Raises HTTPException on failure.
	"""
	if not audio_url:
	raise HTTPException(status_code=400, detail="audio_url is required for transcription")

	groq_url = "https://api.groq.com/openai/v1/audio/transcriptions"
	headers = {"Authorization": f"Bearer {GROQ_API_KEY}"}
	files = {
	"model": (None, model),
	"url": (None, audio_url),
	"temperature": (None, "0"),
	"response_format": (None, response_format),
	}

	try:
	resp = requests.post(groq_url, headers=headers, files=files, timeout=timeout)
	except requests.RequestException as e:
	raise HTTPException(status_code=502, detail=f"GROQ transcription request failed: {str(e)}")

	if resp.status_code != 200:
	raise HTTPException(status_code=502, detail=f"GROQ transcription failed ({resp.status_code}): {resp.text}")

	try:
	data = resp.json()
	except ValueError:
	raise HTTPException(status_code=502, detail="GROQ transcription returned non-json response")

	# Extract text from response
	text = None
	if isinstance(data, dict):
	text = data.get("text") or data.get("transcription") or data.get("transcribed_text")

	if not text and data.get("segments") and isinstance(data["segments"], list):
	text = " ".join([seg.get("text", "").strip() for seg in data["segments"] if seg.get("text")])

	if not text:
	for k, v in data.items():
	if isinstance(v, str) and len(v) > 0:
	if len(v.split()) > 2:
	text = v
	break

	if not text:
	raise HTTPException(status_code=502, detail="Unable to extract transcription text from GROQ response")

	language = None
	if isinstance(data, dict):
	language = data.get("language") or data.get("detected_language")

	return {"text": text, "language": language, "raw": data}

	# Local transcription setup
	UPLOAD_DIR = Path(tempfile.gettempdir()) / "audio_uploads"
	UPLOAD_DIR.mkdir(parents=True, exist_ok=True)

	_model_cache = {}

	def get_model(model_size="small"):
	"""Get or create cached Whisper model"""
	device = "cuda" if os.environ.get("SPACE_TYPE") == "PRO" else "cpu"
	compute_type = "int8_float16" if device == "cuda" else "int8"
	key = (model_size, device, compute_type)
	if key not in _model_cache:
	_model_cache[key] = WhisperModel(
	model_size,
	device=device,
	compute_type=compute_type,
	download_root=str(Path(os.environ.get("HF_HOME", "/tmp/huggingface")) / "hub")
	)
	return _model_cache[key]

	async def transcribe_audio_local(file: UploadFile, model_size: str = "small") -> dict:
	"""Transcribe audio file locally using faster_whisper"""
	if not file.content_type or not file.content_type.startswith("audio/"):
	raise HTTPException(status_code=400, detail="Only audio files allowed")

	temp_file = UPLOAD_DIR / f"{uuid.uuid4().hex}_{file.filename}"
	async with aiofiles.open(temp_file, 'wb') as f:
	while chunk := await file.read(8192):
	await f.write(chunk)

	try:
	model = get_model(model_size)
	segments, info = model.transcribe(str(temp_file), beam_size=5)
	full_text = "\n".join(seg.text.strip() for seg in segments)

	return {
	"text": full_text,
	"language": info.language,
	"raw": {
	"segments": [s._asdict() for s in segments],
	"language": info.language
	}
	}
	finally:
	temp_file.unlink(missing_ok=True)

	# Updated evaluate endpoint: supports audio_transcribe question type with audio_url / audio_answer_path
	@app.post("/api/qa/evaluate")
	async def evaluate_question_and_answer(payload: dict):
	"""
	Unified endpoint — accepts a question + student answer JSON and returns
	validation + grading + structured output. Supports all question types,
	including audio_transcribe (accepts audio_url or audio_answer_path and will call GROQ to transcribe).
	"""
	try:
	question_data = payload.get("question")
	student_response = payload.get("student_answer")

	if not question_data or not student_response:
	raise HTTPException(status_code=400, detail="Missing 'question' or 'student_answer' field")

	# Backwards compatibility: allow "answer" key in question input
	if "answer" in question_data:
	question_data["correct_answer"] = question_data["answer"]

	# Validate structure via Pydantic
	try:
	question_obj = QuestionRequest(**question_data)
	except Exception as e:
	raise HTTPException(status_code=422, detail=f"Invalid question format: {str(e)}")

	# Decide how to obtain the student's textual answer
	student_answer_text = None
	transcription_info = None

	# If question type is audio_transcribe, accept audio_url or audio_answer_path and transcribe if needed
	if question_obj.question_type == QuestionType.audio_transcribe:
	# Prefer explicit provided 'answer' text if present
	provided_text = None
	if isinstance(student_response, dict):
	provided_text = student_response.get("answer") or student_response.get("transcribed_text")

	if provided_text and isinstance(provided_text, str) and provided_text.strip():
	student_answer_text = provided_text.strip()
	transcription_info = {"source": "provided_text", "audio_url": student_response.get("audio_url") or student_response.get("audio_answer_path")}
	else:
	# Look for audio URL fields
	audio_url = None
	if isinstance(student_response, dict):
	audio_url = student_response.get("audio_url") or student_response.get("audio_answer_path") or student_response.get("audio_path")
	if not audio_url:
	raise HTTPException(status_code=400, detail="audio_transcribe question requires 'audio_url' or 'audio_answer_path' in student_answer when no text 'answer' provided")

	# Transcribe using GROQ
	transcription = transcribe_audio_groq(audio_url)
	student_answer_text = transcription["text"]
	transcription_info = {
	"source": "groq_transcription",
	"audio_url": audio_url,
	"language": transcription.get("language"),
	"raw_response": transcription.get("raw")
	}

	else:
	# Non-audio types: accept 'answer' field directly. If student_response is just a primitive, try that.
	if isinstance(student_response, dict):
	# standard field name 'answer' or fallback to top-level fields
	if "answer" in student_response:
	student_answer_text = student_response.get("answer")
	elif "student_answer" in student_response:
	student_answer_text = student_response.get("student_answer")
	else:
	# if the student_response itself is the answer object (for multi-part answers), pass it through
	student_answer_text = student_response.get("answer", student_response)
	else:
	student_answer_text = student_response

	# Normalize a single primitive answer out of containers when possible
	# For grading, we pass raw value (string/list/dict/bool) as expected by grade_answer_with_ai
	student_answer_for_grading = student_answer_text

	# Step 1 — AI validation (use the question object as provided)
	ai_validation_raw = validate_question_with_ai(question_obj.model_dump())
	ai_validation = AIValidation(**ai_validation_raw)

	# Step 2 — AI grading (use question dict and extracted student answer)
	ai_grading_raw = grade_answer_with_ai(question_obj.model_dump(), student_answer_for_grading)
	ai_grading = AIGrading(**ai_grading_raw)

	# Step 3 — Structured response
	response = {
	"status": "success",
	"evaluation": {
	"validation": {
	"is_valid": ai_validation.is_valid,
	"issues": [] if ai_validation.is_valid else ["Detected issues in question content"],
	"suggestions": [] if not ai_validation.suggestions else [ai_validation.suggestions]
	},
	"grading": {
	"ai_score": ai_grading.ai_score,
	"total_marks": ai_grading.total_marks,
	"is_correct": ai_grading.is_correct,
	"feedback": ai_grading.feedback
	}
	},
	"question_summary": {
	"type": question_obj.question_type,
	"text": question_obj.question,
	"reference_answer": question_obj.correct_answer,
	"metadata": question_obj.metadata.model_dump() if question_obj.metadata else None,
	"attachments": [att.model_dump() for att in (question_obj.attachments or [])]
	},
	"student_response": {
	# return original fields where possible and include transcription info if applicable
	"original_submission": student_response,
	"answer": student_answer_for_grading
	}
	}

	if transcription_info:
	response["student_response"]["transcription"] = transcription_info

	return response

	except HTTPException:
	raise
	except Exception as e:
	raise HTTPException(status_code=500, detail=f"Evaluation error: {str(e)}")


	@app.post("/api/qa/evaluate_local")
	async def evaluate_question_and_answer_local(
	question: str = Form(..., description="Question JSON string"),
	audio_file: UploadFile = File(..., description="Audio file with student's answer"),
	model_size: str = Form("small", description="Whisper model size: tiny, base, small, medium, large")
	):
	"""
	Local evaluation endpoint using faster_whisper for transcription.
	Accepts form-data:
	- question: JSON string with question details (same format as /api/qa/evaluate)
	- audio_file: Audio file containing student's spoken answer
	- model_size: Optional whisper model size (default: small)

	Example curl:
	curl -X POST http://localhost:8000/api/qa/evaluate_local \
	-F 'question={"question_type":"audio_transcribe","question":"What is AI?","correct_answer":"Artificial Intelligence"}' \
	-F 'audio_file=@student_answer.mp3' \
	-F 'model_size=small'
	"""
	try:
	# Parse question JSON
	try:
	question_data = json.loads(question)
	except json.JSONDecodeError as e:
	raise HTTPException(status_code=400, detail=f"Invalid question JSON: {str(e)}")

	if not question_data:
	raise HTTPException(status_code=400, detail="Missing 'question' data")

	# Backwards compatibility: allow "answer" key in question input
	if "answer" in question_data:
	question_data["correct_answer"] = question_data["answer"]

	# Validate structure via Pydantic
	try:
	question_obj = QuestionRequest(**question_data)
	except Exception as e:
	raise HTTPException(status_code=422, detail=f"Invalid question format: {str(e)}")

	# Transcribe audio locally using faster_whisper
	transcription = await transcribe_audio_local(audio_file, model_size)
	student_answer_text = transcription["text"]

	transcription_info = {
	"source": "local_whisper",
	"model_size": model_size,
	"language": transcription.get("language"),
	"raw_response": transcription.get("raw")
	}

	# Step 1 — AI validation
	ai_validation_raw = validate_question_with_ai(question_obj.model_dump())
	ai_validation = AIValidation(**ai_validation_raw)

	# Step 2 — AI grading
	ai_grading_raw = grade_answer_with_ai(question_obj.model_dump(), student_answer_text)
	ai_grading = AIGrading(**ai_grading_raw)

	# Step 3 — Structured response
	response = {
	"status": "success",
	"evaluation": {
	"validation": {
	"is_valid": ai_validation.is_valid,
	"issues": [] if ai_validation.is_valid else ["Detected issues in question content"],
	"suggestions": [] if not ai_validation.suggestions else [ai_validation.suggestions]
	},
	"grading": {
	"ai_score": ai_grading.ai_score,
	"total_marks": ai_grading.total_marks,
	"is_correct": ai_grading.is_correct,
	"feedback": ai_grading.feedback
	}
	},
	"question_summary": {
	"type": question_obj.question_type,
	"text": question_obj.question,
	"reference_answer": question_obj.correct_answer,
	"metadata": question_obj.metadata.model_dump() if question_obj.metadata else None,
	"attachments": [att.model_dump() for att in (question_obj.attachments or [])]
	},
	"student_response": {
	"answer": student_answer_text,
	"transcription": transcription_info
	}
	}

	return response

	except HTTPException:
	raise
	except Exception as e:
	raise HTTPException(status_code=500, detail=f"Evaluation error: {str(e)}")


	if __name__ == "__main__":
	import uvicorn
	print("=" * 60)
	print("Educational Question API Server v3.0")
	print("=" * 60)
	print(f"Gemini API Key: {'✓ Configured' if os.environ.get('GEMINI_API_KEY') else '✗ NOT SET'}")
	print(f"Model: gemini-flash-lite-latest")
	print(f"Server: http://localhost:8000")
	print(f"Docs: http://localhost:8000/docs")
	print(f"Endpoints: /api/qa/evaluate, /api/qa/evaluate_local")
	print("=" * 60)
	uvicorn.run(app, host="0.0.0.0", port=8000)