Spaces:

triflix
/

answerevaluate

Sleeping

File size: 27,187 Bytes

74ba611

from fastapi import FastAPI, HTTPException, UploadFile, Form, File
from pydantic import BaseModel, Field, field_validator
from typing import List, Dict, Union, Optional, Any
from enum import Enum
import os
from google import genai
from google.genai import types
import json
from dotenv import load_dotenv
import requests
from faster_whisper import WhisperModel
import uuid
from pathlib import Path
import aiofiles
import tempfile

load_dotenv()

GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
if not GROQ_API_KEY:
    raise RuntimeError("Missing GROQ_API_KEY in environment variables")

app = FastAPI(
    title="Educational Question API",
    description="API for question evaluation and grading",
    version="3.0.0"
)

# Enums
class QuestionType(str, Enum):
    sentence = "sentence"
    short = "short"
    fill_blank = "fill_blank"
    select = "select"
    multi_select = "multi_select"
    true_false = "true_false"
    matching = "matching"
    ordering = "ordering"
    image_labeling = "image_labeling"
    code_output = "code_output"
    code_debug = "code_debug"
    audio_transcribe = "audio_transcribe"
    video_question = "video_question"
    drag_drop = "drag_drop"
    math_expression = "math_expression"
    diagram = "diagram"

class Attachment(BaseModel):
    type: str
    url: Optional[str] = None
    content: Optional[str] = None

class Metadata(BaseModel):
    min_words: Optional[int] = None
    max_words: Optional[int] = None
    language: Optional[str] = "en"
    difficulty: Optional[str] = "medium"
    total_marks: Optional[int] = 100

class QuestionRequest(BaseModel):
    question_type: QuestionType
    question: str
    correct_answer: Union[str, List[str], Dict[str, Any], bool] = Field(
        ...,
        description="The correct/reference answer for this question"
    )
    options: Optional[Union[List[Any], Dict[str, Any]]] = []
    metadata: Optional[Metadata] = None
    attachments: Optional[List[Attachment]] = []
    context: Optional[str] = Field(
        None,
        description="Additional context to help AI understand domain-specific knowledge"
    )

    @field_validator('correct_answer')
    @classmethod
    def validate_answer(cls, v, info):
        q_type = info.data.get('question_type')
        if q_type is None:
            return v
        if q_type == QuestionType.true_false and not isinstance(v, bool):
            raise ValueError("Answer must be boolean for true_false type")
        if q_type in [QuestionType.multi_select, QuestionType.ordering] and not isinstance(v, list):
            raise ValueError(f"Answer must be a list for {q_type} type")
        if q_type in [QuestionType.matching, QuestionType.image_labeling] and not isinstance(v, dict):
            raise ValueError(f"Answer must be a dictionary for {q_type} type")
        return v

class AIValidation(BaseModel):
    is_valid: bool
    suggestions: Optional[str] = None

class AIGrading(BaseModel):
    ai_score: float
    total_marks: int
    feedback: Optional[str] = None
    is_correct: bool

# Define binary vs subjective question types
BINARY_TYPES = [
    QuestionType.short,
    QuestionType.fill_blank,
    QuestionType.select,
    QuestionType.true_false,
    QuestionType.matching,
    QuestionType.ordering,
    QuestionType.code_output,
    QuestionType.image_labeling,
    QuestionType.multi_select
]

SUBJECTIVE_TYPES = [
    QuestionType.sentence,
    QuestionType.code_debug,
    QuestionType.math_expression
]

def validate_question_with_ai(question_data: dict) -> dict:
    """

    Validates if a question is well-formed.

    Returns: {"is_valid": bool, "suggestions": str or null}

    """
    api_key = os.environ.get("GEMINI_API_KEY")
    if not api_key:
        return {
            "is_valid": True,
            "suggestions": "AI validation skipped - GEMINI_API_KEY not configured"
        }
    
    question_type = question_data.get("question_type")
    context = question_data.get("context", "")
    
    try:
        client = genai.Client(api_key=api_key)
        model = "gemini-flash-lite-latest"
        
        question_text = question_data.get("question", "")
        correct_answer = question_data.get("correct_answer", "")
        options = question_data.get("options") or []
        attachments = question_data.get("attachments") or []
        
        context_section = ""
        if context and context.strip():
            context_section = f"\n## CONTEXT:\n{context.strip()}\n"
        
        system_instruction = """You are an educational content validator.

Validate if the question is:

- Clear and unambiguous

- Grammatically correct

- Has proper formatting

- Includes correct answer in options (if applicable)

- Factually accurate



Return ONLY valid JSON:

{

  "is_valid": <true/false>,

  "suggestions": "<text or null>"

}

"""
        
        prompt_text = f"""Validate this educational question:

**Type:** {question_type}

**Question:** {question_text}

**Correct Answer:** {json.dumps(correct_answer, ensure_ascii=False)}

**Options:** {json.dumps(options, ensure_ascii=False)}

**Attachments:** {json.dumps(attachments, ensure_ascii=False)}

{context_section}

Return validation result in JSON format."""

        contents = [types.Content(role="user", parts=[types.Part.from_text(text=prompt_text)])]
        config = types.GenerateContentConfig(
            temperature=0.2,
            response_mime_type="application/json",
            system_instruction=[types.Part.from_text(text=system_instruction)]
        )
        
        response_parts = []
        for chunk in client.models.generate_content_stream(model=model, contents=contents, config=config):
            if text := getattr(chunk, "text", None):
                response_parts.append(text)
        
        response_text = "".join(response_parts).strip()
        if response_text:
            parsed = json.loads(response_text)
            return {
                "is_valid": parsed.get("is_valid", True),
                "suggestions": parsed.get("suggestions")
            }
        
        return {"is_valid": True, "suggestions": None}
    
    except Exception as e:
        return {"is_valid": True, "suggestions": f"Validation error: {str(e)}"}

def grade_answer_with_ai(question_data: dict, student_answer: Any) -> dict:
    """

    Grades a student's answer against the correct answer.

    Returns: {

      "ai_score": float,

      "total_marks": int,

      "feedback": str or null,

      "is_correct": bool

    }

    """
    api_key = os.environ.get("GEMINI_API_KEY")
    metadata = question_data.get("metadata") or {}
    total_marks = metadata.get("total_marks", 100) if isinstance(metadata, dict) else 100
    
    if not api_key:
        return {
            "ai_score": 0,
            "total_marks": total_marks,
            "feedback": "AI grading unavailable - GEMINI_API_KEY not configured",
            "is_correct": False
        }
    
    question_type = question_data.get("question_type")
    context = question_data.get("context", "")
    
    try:
        client = genai.Client(api_key=api_key)
        model = "gemini-flash-lite-latest"
        
        question_text = question_data.get("question", "")
        correct_answer = question_data.get("correct_answer", "")
        attachments = question_data.get("attachments") or []
        
        context_section = ""
        if context and context.strip():
            context_section = f"\n## CONTEXT:\n{context.strip()}\n"
        
        # Different grading logic for binary vs subjective
        if question_type in BINARY_TYPES:
            system_instruction = """You are an educational answer grader for EXACT-MATCH questions.

For binary question types (select, true_false, fill_blank, short, matching, ordering, code_output, image_labeling, multi_select):

- Compare student answer with correct answer

- Award full marks if correct, 0 if incorrect

- Be strict but account for minor formatting differences

- For text answers, ignore case and extra whitespace

- For lists/arrays, order matters unless it's multi_select



Return ONLY valid JSON:

{

  "ai_score": <0 or total_marks>,

  "total_marks": <number>,

  "feedback": "<optional text>",

  "is_correct": <true/false>

}

"""
            
            prompt_text = f"""Grade this student answer:

**Question Type:** {question_type}

**Question:** {question_text}

**Correct Answer:** {json.dumps(correct_answer, ensure_ascii=False)}

**Student Answer:** {json.dumps(student_answer, ensure_ascii=False)}

**Total Marks:** {total_marks}

{context_section}

Compare and grade the student answer."""
        
        else:  # SUBJECTIVE_TYPES
            system_instruction = """You are an educational answer grader for SUBJECTIVE questions.

For subjective question types (sentence, code_debug, math_expression):

- Compare student answer with correct/reference answer

- Award partial credit based on quality (0 to total_marks)

- Consider: accuracy, completeness, clarity, depth

- Check word count requirements if specified

- Provide constructive feedback



Return ONLY valid JSON:

{

  "ai_score": <number>,

  "total_marks": <number>,

  "feedback": "<text>",

  "is_correct": <true if score >= 70% of total_marks, else false>

}



Scoring guidelines:

- 90-100%: Excellent, comprehensive answer

- 70-89%: Good answer with minor gaps

- 50-69%: Acceptable but incomplete

- Below 50%: Significant issues or incorrect

"""
            
            prompt_text = f"""Grade this student answer:

**Question Type:** {question_type}

**Question:** {question_text}

**Reference Answer:** {json.dumps(correct_answer, ensure_ascii=False)}

**Student Answer:** {json.dumps(student_answer, ensure_ascii=False)}

**Metadata:** {json.dumps(metadata, ensure_ascii=False)}

**Total Marks:** {total_marks}

{context_section}

Evaluate the student answer quality and provide detailed feedback."""
        
        contents = [types.Content(role="user", parts=[types.Part.from_text(text=prompt_text)])]
        config = types.GenerateContentConfig(
            temperature=0.2,
            response_mime_type="application/json",
            system_instruction=[types.Part.from_text(text=system_instruction)]
        )
        
        response_parts = []
        for chunk in client.models.generate_content_stream(model=model, contents=contents, config=config):
            if text := getattr(chunk, "text", None):
                response_parts.append(text)
        
        response_text = "".join(response_parts).strip()
        
        if not response_text:
            return {
                "ai_score": 0,
                "total_marks": total_marks,
                "feedback": "Empty AI response",
                "is_correct": False
            }
        
        try:
            parsed = json.loads(response_text)
            ai_score = float(parsed.get("ai_score", 0))
            ai_score = max(0, min(ai_score, total_marks))
            
            feedback = parsed.get("feedback")
            if feedback in [None, "null", "", "None"]:
                feedback = None
            
            is_correct = parsed.get("is_correct", ai_score >= (total_marks * 0.7))
            
            return {
                "ai_score": ai_score,
                "total_marks": total_marks,
                "feedback": feedback,
                "is_correct": bool(is_correct)
            }
        
        except (json.JSONDecodeError, ValueError) as e:
            return {
                "ai_score": 0,
                "total_marks": total_marks,
                "feedback": f"Failed to parse AI response: {str(e)}",
                "is_correct": False
            }
    
    except Exception as e:
        return {
            "ai_score": 0,
            "total_marks": total_marks,
            "feedback": f"Grading error: {str(e)}",
            "is_correct": False
        }

def transcribe_audio_groq(audio_url: str, model: str = "whisper-large-v3-turbo", response_format: str = "verbose_json", timeout: int = 60) -> dict:
    """

    Transcribe audio located at `audio_url` using GROQ API.

    Returns a dict: {"text": <string>, "language": <string>, "raw": <dict>}

    Raises HTTPException on failure.

    """
    if not audio_url:
        raise HTTPException(status_code=400, detail="audio_url is required for transcription")
    
    groq_url = "https://api.groq.com/openai/v1/audio/transcriptions"
    headers = {"Authorization": f"Bearer {GROQ_API_KEY}"}
    files = {
        "model": (None, model),
        "url": (None, audio_url),
        "temperature": (None, "0"),
        "response_format": (None, response_format),
    }
    
    try:
        resp = requests.post(groq_url, headers=headers, files=files, timeout=timeout)
    except requests.RequestException as e:
        raise HTTPException(status_code=502, detail=f"GROQ transcription request failed: {str(e)}")
    
    if resp.status_code != 200:
        raise HTTPException(status_code=502, detail=f"GROQ transcription failed ({resp.status_code}): {resp.text}")
    
    try:
        data = resp.json()
    except ValueError:
        raise HTTPException(status_code=502, detail="GROQ transcription returned non-json response")
    
    # Extract text from response
    text = None
    if isinstance(data, dict):
        text = data.get("text") or data.get("transcription") or data.get("transcribed_text")
        
        if not text and data.get("segments") and isinstance(data["segments"], list):
            text = " ".join([seg.get("text", "").strip() for seg in data["segments"] if seg.get("text")])
        
        if not text:
            for k, v in data.items():
                if isinstance(v, str) and len(v) > 0:
                    if len(v.split()) > 2:
                        text = v
                        break
    
    if not text:
        raise HTTPException(status_code=502, detail="Unable to extract transcription text from GROQ response")
    
    language = None
    if isinstance(data, dict):
        language = data.get("language") or data.get("detected_language")
    
    return {"text": text, "language": language, "raw": data}

# Local transcription setup
UPLOAD_DIR = Path(tempfile.gettempdir()) / "audio_uploads"
UPLOAD_DIR.mkdir(parents=True, exist_ok=True)

_model_cache = {}

def get_model(model_size="small"):
    """Get or create cached Whisper model"""
    device = "cuda" if os.environ.get("SPACE_TYPE") == "PRO" else "cpu"
    compute_type = "int8_float16" if device == "cuda" else "int8"
    key = (model_size, device, compute_type)
    if key not in _model_cache:
        _model_cache[key] = WhisperModel(
            model_size,
            device=device,
            compute_type=compute_type,
            download_root=str(Path(os.environ.get("HF_HOME", "/tmp/huggingface")) / "hub")
        )
    return _model_cache[key]

async def transcribe_audio_local(file: UploadFile, model_size: str = "small") -> dict:
    """Transcribe audio file locally using faster_whisper"""
    if not file.content_type or not file.content_type.startswith("audio/"):
        raise HTTPException(status_code=400, detail="Only audio files allowed")

    temp_file = UPLOAD_DIR / f"{uuid.uuid4().hex}_{file.filename}"
    async with aiofiles.open(temp_file, 'wb') as f:
        while chunk := await file.read(8192):
            await f.write(chunk)

    try:
        model = get_model(model_size)
        segments, info = model.transcribe(str(temp_file), beam_size=5)
        full_text = "\n".join(seg.text.strip() for seg in segments)

        return {
            "text": full_text,
            "language": info.language,
            "raw": {
                "segments": [s._asdict() for s in segments],
                "language": info.language
            }
        }
    finally:
        temp_file.unlink(missing_ok=True)

# Updated evaluate endpoint: supports audio_transcribe question type with audio_url / audio_answer_path
@app.post("/api/qa/evaluate")
async def evaluate_question_and_answer(payload: dict):
    """

    Unified endpoint — accepts a question + student answer JSON and returns

    validation + grading + structured output. Supports all question types,

    including audio_transcribe (accepts audio_url or audio_answer_path and will call GROQ to transcribe).

    """
    try:
        question_data = payload.get("question")
        student_response = payload.get("student_answer")

        if not question_data or not student_response:
            raise HTTPException(status_code=400, detail="Missing 'question' or 'student_answer' field")

        # Backwards compatibility: allow "answer" key in question input
        if "answer" in question_data:
            question_data["correct_answer"] = question_data["answer"]

        # Validate structure via Pydantic
        try:
            question_obj = QuestionRequest(**question_data)
        except Exception as e:
            raise HTTPException(status_code=422, detail=f"Invalid question format: {str(e)}")

        # Decide how to obtain the student's textual answer
        student_answer_text = None
        transcription_info = None

        # If question type is audio_transcribe, accept audio_url or audio_answer_path and transcribe if needed
        if question_obj.question_type == QuestionType.audio_transcribe:
            # Prefer explicit provided 'answer' text if present
            provided_text = None
            if isinstance(student_response, dict):
                provided_text = student_response.get("answer") or student_response.get("transcribed_text")

            if provided_text and isinstance(provided_text, str) and provided_text.strip():
                student_answer_text = provided_text.strip()
                transcription_info = {"source": "provided_text", "audio_url": student_response.get("audio_url") or student_response.get("audio_answer_path")}
            else:
                # Look for audio URL fields
                audio_url = None
                if isinstance(student_response, dict):
                    audio_url = student_response.get("audio_url") or student_response.get("audio_answer_path") or student_response.get("audio_path")
                if not audio_url:
                    raise HTTPException(status_code=400, detail="audio_transcribe question requires 'audio_url' or 'audio_answer_path' in student_answer when no text 'answer' provided")

                # Transcribe using GROQ
                transcription = transcribe_audio_groq(audio_url)
                student_answer_text = transcription["text"]
                transcription_info = {
                    "source": "groq_transcription",
                    "audio_url": audio_url,
                    "language": transcription.get("language"),
                    "raw_response": transcription.get("raw")
                }

        else:
            # Non-audio types: accept 'answer' field directly. If student_response is just a primitive, try that.
            if isinstance(student_response, dict):
                # standard field name 'answer' or fallback to top-level fields
                if "answer" in student_response:
                    student_answer_text = student_response.get("answer")
                elif "student_answer" in student_response:
                    student_answer_text = student_response.get("student_answer")
                else:
                    # if the student_response itself is the answer object (for multi-part answers), pass it through
                    student_answer_text = student_response.get("answer", student_response)
            else:
                student_answer_text = student_response

        # Normalize a single primitive answer out of containers when possible
        # For grading, we pass raw value (string/list/dict/bool) as expected by grade_answer_with_ai
        student_answer_for_grading = student_answer_text

        # Step 1 — AI validation (use the question object as provided)
        ai_validation_raw = validate_question_with_ai(question_obj.model_dump())
        ai_validation = AIValidation(**ai_validation_raw)

        # Step 2 — AI grading (use question dict and extracted student answer)
        ai_grading_raw = grade_answer_with_ai(question_obj.model_dump(), student_answer_for_grading)
        ai_grading = AIGrading(**ai_grading_raw)

        # Step 3 — Structured response
        response = {
            "status": "success",
            "evaluation": {
                "validation": {
                    "is_valid": ai_validation.is_valid,
                    "issues": [] if ai_validation.is_valid else ["Detected issues in question content"],
                    "suggestions": [] if not ai_validation.suggestions else [ai_validation.suggestions]
                },
                "grading": {
                    "ai_score": ai_grading.ai_score,
                    "total_marks": ai_grading.total_marks,
                    "is_correct": ai_grading.is_correct,
                    "feedback": ai_grading.feedback
                }
            },
            "question_summary": {
                "type": question_obj.question_type,
                "text": question_obj.question,
                "reference_answer": question_obj.correct_answer,
                "metadata": question_obj.metadata.model_dump() if question_obj.metadata else None,
                "attachments": [att.model_dump() for att in (question_obj.attachments or [])]
            },
            "student_response": {
                # return original fields where possible and include transcription info if applicable
                "original_submission": student_response,
                "answer": student_answer_for_grading
            }
        }

        if transcription_info:
            response["student_response"]["transcription"] = transcription_info

        return response

    except HTTPException:
        raise
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Evaluation error: {str(e)}")


@app.post("/api/qa/evaluate_local")
async def evaluate_question_and_answer_local(

    question: str = Form(..., description="Question JSON string"),

    audio_file: UploadFile = File(..., description="Audio file with student's answer"),

    model_size: str = Form("small", description="Whisper model size: tiny, base, small, medium, large")

):
    """

    Local evaluation endpoint using faster_whisper for transcription.

    Accepts form-data:

    - question: JSON string with question details (same format as /api/qa/evaluate)

    - audio_file: Audio file containing student's spoken answer

    - model_size: Optional whisper model size (default: small)

    

    Example curl:

    curl -X POST http://localhost:8000/api/qa/evaluate_local \

      -F 'question={"question_type":"audio_transcribe","question":"What is AI?","correct_answer":"Artificial Intelligence"}' \

      -F 'audio_file=@student_answer.mp3' \

      -F 'model_size=small'

    """
    try:
        # Parse question JSON
        try:
            question_data = json.loads(question)
        except json.JSONDecodeError as e:
            raise HTTPException(status_code=400, detail=f"Invalid question JSON: {str(e)}")
        
        if not question_data:
            raise HTTPException(status_code=400, detail="Missing 'question' data")
        
        # Backwards compatibility: allow "answer" key in question input
        if "answer" in question_data:
            question_data["correct_answer"] = question_data["answer"]
        
        # Validate structure via Pydantic
        try:
            question_obj = QuestionRequest(**question_data)
        except Exception as e:
            raise HTTPException(status_code=422, detail=f"Invalid question format: {str(e)}")
        
        # Transcribe audio locally using faster_whisper
        transcription = await transcribe_audio_local(audio_file, model_size)
        student_answer_text = transcription["text"]
        
        transcription_info = {
            "source": "local_whisper",
            "model_size": model_size,
            "language": transcription.get("language"),
            "raw_response": transcription.get("raw")
        }
        
        # Step 1 — AI validation
        ai_validation_raw = validate_question_with_ai(question_obj.model_dump())
        ai_validation = AIValidation(**ai_validation_raw)
        
        # Step 2 — AI grading
        ai_grading_raw = grade_answer_with_ai(question_obj.model_dump(), student_answer_text)
        ai_grading = AIGrading(**ai_grading_raw)
        
        # Step 3 — Structured response
        response = {
            "status": "success",
            "evaluation": {
                "validation": {
                    "is_valid": ai_validation.is_valid,
                    "issues": [] if ai_validation.is_valid else ["Detected issues in question content"],
                    "suggestions": [] if not ai_validation.suggestions else [ai_validation.suggestions]
                },
                "grading": {
                    "ai_score": ai_grading.ai_score,
                    "total_marks": ai_grading.total_marks,
                    "is_correct": ai_grading.is_correct,
                    "feedback": ai_grading.feedback
                }
            },
            "question_summary": {
                "type": question_obj.question_type,
                "text": question_obj.question,
                "reference_answer": question_obj.correct_answer,
                "metadata": question_obj.metadata.model_dump() if question_obj.metadata else None,
                "attachments": [att.model_dump() for att in (question_obj.attachments or [])]
            },
            "student_response": {
                "answer": student_answer_text,
                "transcription": transcription_info
            }
        }
        
        return response
    
    except HTTPException:
        raise
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Evaluation error: {str(e)}")


if __name__ == "__main__":
    import uvicorn
    print("=" * 60)
    print("Educational Question API Server v3.0")
    print("=" * 60)
    print(f"Gemini API Key: {'✓ Configured' if os.environ.get('GEMINI_API_KEY') else '✗ NOT SET'}")
    print(f"Model: gemini-flash-lite-latest")
    print(f"Server: http://localhost:8000")
    print(f"Docs: http://localhost:8000/docs")
    print(f"Endpoints: /api/qa/evaluate, /api/qa/evaluate_local")
    print("=" * 60)
    uvicorn.run(app, host="0.0.0.0", port=8000)