""" AI Examiner Agent — Groq with tool calling + fallback parser for leaked function calls. """ import json import re import uuid from datetime import datetime from openai import OpenAI from exam_functions import ( start_exam, get_next_topic, end_exam, set_topic_queue, Message, ) TOOLS = [ { "type": "function", "function": { "name": "start_exam", "description": "Call once the student provided name and email. Returns list of exam topics.", "parameters": { "type": "object", "properties": { "email": {"type": "string"}, "name": {"type": "string"}, }, "required": ["email", "name"], }, }, }, { "type": "function", "function": { "name": "get_next_topic", "description": "Call to get the next exam topic. Returns empty string when no topics remain.", "parameters": {"type": "object", "properties": {}, "required": []}, }, }, { "type": "function", "function": { "name": "end_exam", "description": "Call after giving final feedback. Saves score (0-10) permanently.", "parameters": { "type": "object", "properties": { "email": {"type": "string"}, "score": {"type": "number"}, }, "required": ["email", "score"], }, }, }, ] SYSTEM_PROMPT = """You are an AI university examiner conducting an NLP course oral exam. RULES: 1. Greet the student and ask for their full name and email. 2. Once you have both, call start_exam(email, name). - On error: ask to double-check details. - On success: immediately call get_next_topic() to get the first topic. 3. For EACH topic, conduct a dialogue: - Ask an open-ended question about the topic. - Move to the NEXT QUESTION (not next topic) when: a) The student gives a sufficiently complete answer — ask a follow-up to go deeper. b) The student says "I don't know" or similar — acknowledge and ask a different/simpler question on the SAME topic. c) It becomes clear the student's level won't change with more questions — then move to the next TOPIC. - Move to the NEXT TOPIC (call get_next_topic()) when: a) The student's knowledge on this topic is clearly established. b) The student has said "I don't know" to 2+ questions in a row on this topic. c) You have asked 3+ questions and have a clear picture of the student's level. 4. CRITICAL: Do NOT show the score or end the exam until get_next_topic() returns "". Cover ALL topics. 5. After all topics: - Show the student their score (0-10) and feedback (strengths + what to improve). - Call end_exam(email, score) with the EXACT numeric score you stated. - Scoring guide: * 9-10: Deep, accurate, detailed answers on all topics. * 7-8: Good understanding, minor gaps. * 5-6: Partial understanding, significant gaps. * 3-4: Mostly "I don't know", very shallow answers. * 0-2: No meaningful answers at all. 6. Be encouraging but STRICT and objective. "I don't know" lowers the score significantly. 7. Match the student's language (Ukrainian or English). 8. Never add meta-comments in parentheses. Speak naturally.""" def _extract_first_json(s: str) -> str: """Extract the first valid JSON object from a string.""" depth = 0 start = None for i, c in enumerate(s): if c == "{": if start is None: start = i depth += 1 elif c == "}": depth -= 1 if depth == 0 and start is not None: candidate = s[start:i+1] try: json.loads(candidate) return candidate except (json.JSONDecodeError, ValueError): start = None return "{}" def _parse_leaked_calls(text: str) -> list[tuple[str, str]]: """Parse in any format Llama might use.""" found = [] # Find function name, then extract first valid JSON after it pattern = re.compile(r" str: try: inputs = json.loads(arguments_str) if arguments_str and arguments_str.strip() not in ("null", "None", "") else {} except json.JSONDecodeError: inputs = {} self._log("tool_call", f"{name}({arguments_str})") if name == "start_exam": try: topics = start_exam(inputs["email"], inputs["name"]) set_topic_queue(topics) self.student_email = inputs["email"] return json.dumps({"topics": topics}) except ValueError as e: return json.dumps({"error": str(e)}) elif name == "get_next_topic": return json.dumps({"topic": get_next_topic()}) elif name == "end_exam": score = inputs.get("score", None) # Fallback: extract score from chat history if missing or zero if not score: for entry in reversed(self.history[-10:]): found = re.findall(r"([0-9]+(?:\.[0-9]+)?)\s*(?:out of|/)\s*10", entry.get("content", "")) if found: score = float(found[-1]) break if not score: score = 0.0 if self.student_email: end_exam(self.student_email, float(score), self.history) self.exam_finished = True return json.dumps({"status": "saved"}) return json.dumps({"error": "unknown tool"}) def _inject_leaked(self, leaked: list[tuple[str, str]]): """Execute leaked tool calls and inject results into message history.""" for name, args_str in leaked: result = self._dispatch(name, args_str) fake_id = f"call_{uuid.uuid4().hex[:8]}" self.messages.append({ "role": "assistant", "content": "", "tool_calls": [{"id": fake_id, "type": "function", "function": {"name": name, "arguments": args_str}}], }) self.messages.append({"role": "tool", "tool_call_id": fake_id, "content": result}) def _run_turn(self) -> str: while True: try: response = self.client.chat.completions.create( model="llama-3.3-70b-versatile", max_tokens=1024, tools=TOOLS, tool_choice="auto", messages=[ {"role": "system", "content": SYSTEM_PROMPT}, *self.messages, ], ) except Exception as e: leaked = _parse_leaked_calls(str(e)) if leaked: if self.messages and self.messages[-1]["role"] == "assistant": bad = self.messages.pop() clean = re.sub(r" str: self.messages = [] self.history = [] self.student_email = "" self.exam_finished = False set_topic_queue([]) self.messages = [{"role": "user", "content": "Hello, I am ready for my exam."}] return self._run_turn() def chat(self, user_message: str) -> str: self._log("user", user_message) self.messages.append({"role": "user", "content": user_message}) return self._run_turn()