Spaces:

Denysyk
/

Lab6

Sleeping

File size: 10,248 Bytes

"""
AI Examiner Agent — Groq with tool calling + fallback parser for leaked function calls.
"""

import json
import re
import uuid
from datetime import datetime
from openai import OpenAI

from exam_functions import (
    start_exam, get_next_topic, end_exam, set_topic_queue, Message,
)

TOOLS = [
    {
        "type": "function",
        "function": {
            "name": "start_exam",
            "description": "Call once the student provided name and email. Returns list of exam topics.",
            "parameters": {
                "type": "object",
                "properties": {
                    "email": {"type": "string"},
                    "name":  {"type": "string"},
                },
                "required": ["email", "name"],
            },
        },
    },
    {
        "type": "function",
        "function": {
            "name": "get_next_topic",
            "description": "Call to get the next exam topic. Returns empty string when no topics remain.",
            "parameters": {"type": "object", "properties": {}, "required": []},
        },
    },
    {
        "type": "function",
        "function": {
            "name": "end_exam",
            "description": "Call after giving final feedback. Saves score (0-10) permanently.",
            "parameters": {
                "type": "object",
                "properties": {
                    "email": {"type": "string"},
                    "score": {"type": "number"},
                },
                "required": ["email", "score"],
            },
        },
    },
]

SYSTEM_PROMPT = """You are an AI university examiner conducting an NLP course oral exam.

RULES:
1. Greet the student and ask for their full name and email.
2. Once you have both, call start_exam(email, name).
   - On error: ask to double-check details.
   - On success: immediately call get_next_topic() to get the first topic.

3. For EACH topic, conduct a dialogue:
   - Ask an open-ended question about the topic.
   - Move to the NEXT QUESTION (not next topic) when:
     a) The student gives a sufficiently complete answer — ask a follow-up to go deeper.
     b) The student says "I don't know" or similar — acknowledge and ask a different/simpler question on the SAME topic.
     c) It becomes clear the student's level won't change with more questions — then move to the next TOPIC.
   - Move to the NEXT TOPIC (call get_next_topic()) when:
     a) The student's knowledge on this topic is clearly established.
     b) The student has said "I don't know" to 2+ questions in a row on this topic.
     c) You have asked 3+ questions and have a clear picture of the student's level.

4. CRITICAL: Do NOT show the score or end the exam until get_next_topic() returns "". Cover ALL topics.

5. After all topics:
   - Show the student their score (0-10) and feedback (strengths + what to improve).
   - Call end_exam(email, score) with the EXACT numeric score you stated.
   - Scoring guide:
     * 9-10: Deep, accurate, detailed answers on all topics.
     * 7-8: Good understanding, minor gaps.
     * 5-6: Partial understanding, significant gaps.
     * 3-4: Mostly "I don't know", very shallow answers.
     * 0-2: No meaningful answers at all.

6. Be encouraging but STRICT and objective. "I don't know" lowers the score significantly.
7. Match the student's language (Ukrainian or English).
8. Never add meta-comments in parentheses. Speak naturally."""


def _extract_first_json(s: str) -> str:
    """Extract the first valid JSON object from a string."""
    depth = 0
    start = None
    for i, c in enumerate(s):
        if c == "{":
            if start is None:
                start = i
            depth += 1
        elif c == "}":
            depth -= 1
            if depth == 0 and start is not None:
                candidate = s[start:i+1]
                try:
                    json.loads(candidate)
                    return candidate
                except (json.JSONDecodeError, ValueError):
                    start = None
    return "{}"


def _parse_leaked_calls(text: str) -> list[tuple[str, str]]:
    """Parse <function=NAME...{json}...> in any format Llama might use."""
    found = []
    # Find function name, then extract first valid JSON after it
    pattern = re.compile(r"<function=(\w+)[,\s(]*(\{)", re.DOTALL)
    for m in pattern.finditer(text):
        name = m.group(1)
        json_start = text.index("{", m.start(2))
        args_str = _extract_first_json(text[json_start:])
        found.append((name, args_str))
    return found


class ExaminerAgent:
    def __init__(self, api_key: str):
        self.client = OpenAI(
            api_key=api_key,
            base_url="https://api.groq.com/openai/v1",
        )
        self.messages: list[dict] = []
        self.history: list[Message] = []
        self.student_email = ""
        self.exam_finished = False

    def _log(self, role: str, content: str):
        self.history.append({
            "role": role, "content": content,
            "datetime": datetime.now().isoformat(timespec="seconds"),
        })

    def _dispatch(self, name: str, arguments_str: str) -> str:
        try:
            inputs = json.loads(arguments_str) if arguments_str and arguments_str.strip() not in ("null", "None", "") else {}
        except json.JSONDecodeError:
            inputs = {}

        self._log("tool_call", f"{name}({arguments_str})")

        if name == "start_exam":
            try:
                topics = start_exam(inputs["email"], inputs["name"])
                set_topic_queue(topics)
                self.student_email = inputs["email"]
                return json.dumps({"topics": topics})
            except ValueError as e:
                return json.dumps({"error": str(e)})

        elif name == "get_next_topic":
            return json.dumps({"topic": get_next_topic()})

        elif name == "end_exam":
            score = inputs.get("score", None)
            # Fallback: extract score from chat history if missing or zero
            if not score:
                for entry in reversed(self.history[-10:]):
                    found = re.findall(r"([0-9]+(?:\.[0-9]+)?)\s*(?:out of|/)\s*10", entry.get("content", ""))
                    if found:
                        score = float(found[-1])
                        break
            if not score:
                score = 0.0
            if self.student_email:
                end_exam(self.student_email, float(score), self.history)
                self.exam_finished = True
            return json.dumps({"status": "saved"})

        return json.dumps({"error": "unknown tool"})

    def _inject_leaked(self, leaked: list[tuple[str, str]]):
        """Execute leaked tool calls and inject results into message history."""
        for name, args_str in leaked:
            result = self._dispatch(name, args_str)
            fake_id = f"call_{uuid.uuid4().hex[:8]}"
            self.messages.append({
                "role": "assistant",
                "content": "",
                "tool_calls": [{"id": fake_id, "type": "function",
                                "function": {"name": name, "arguments": args_str}}],
            })
            self.messages.append({"role": "tool", "tool_call_id": fake_id, "content": result})

    def _run_turn(self) -> str:
        while True:
            try:
                response = self.client.chat.completions.create(
                    model="llama-3.3-70b-versatile",
                    max_tokens=1024,
                    tools=TOOLS,
                    tool_choice="auto",
                    messages=[
                        {"role": "system", "content": SYSTEM_PROMPT},
                        *self.messages,
                    ],
                )
            except Exception as e:
                leaked = _parse_leaked_calls(str(e))
                if leaked:
                    if self.messages and self.messages[-1]["role"] == "assistant":
                        bad = self.messages.pop()
                        clean = re.sub(r"<function=.*", "", bad.get("content", ""), flags=re.DOTALL).strip()
                        if clean:
                            self.messages.append({"role": "assistant", "content": clean})
                    self._inject_leaked(leaked)
                    continue
                raise

            msg = response.choices[0].message
            finish_reason = response.choices[0].finish_reason

            assistant_msg: dict = {"role": "assistant", "content": msg.content or ""}
            if msg.tool_calls:
                assistant_msg["tool_calls"] = [
                    {"id": tc.id, "type": "function",
                     "function": {"name": tc.function.name, "arguments": tc.function.arguments}}
                    for tc in msg.tool_calls
                ]
            self.messages.append(assistant_msg)

            if finish_reason != "tool_calls" or not msg.tool_calls:
                text = msg.content or ""
                leaked = _parse_leaked_calls(text)
                if leaked:
                    clean = re.sub(r"<function=.*", "", text, flags=re.DOTALL).strip()
                    self.messages.pop()
                    if clean:
                        self.messages.append({"role": "assistant", "content": clean})
                    self._inject_leaked(leaked)
                    continue
                self._log("system", text)
                return text

            for tc in msg.tool_calls:
                result = self._dispatch(tc.function.name, tc.function.arguments)
                self.messages.append({
                    "role": "tool",
                    "tool_call_id": tc.id,
                    "content": result,
                })

    
    def start(self) -> str:
        self.messages = []
        self.history = []
        self.student_email = ""
        self.exam_finished = False
        set_topic_queue([])
        self.messages = [{"role": "user", "content": "Hello, I am ready for my exam."}]
        return self._run_turn()

    def chat(self, user_message: str) -> str:
        self._log("user", user_message)
        self.messages.append({"role": "user", "content": user_message})
        return self._run_turn()