| """ |
| AI Examiner Agent — Groq with tool calling + fallback parser for leaked function calls. |
| """ |
|
|
| import json |
| import re |
| import uuid |
| from datetime import datetime |
| from openai import OpenAI |
|
|
| from exam_functions import ( |
| start_exam, get_next_topic, end_exam, set_topic_queue, Message, |
| ) |
|
|
| TOOLS = [ |
| { |
| "type": "function", |
| "function": { |
| "name": "start_exam", |
| "description": "Call once the student provided name and email. Returns list of exam topics.", |
| "parameters": { |
| "type": "object", |
| "properties": { |
| "email": {"type": "string"}, |
| "name": {"type": "string"}, |
| }, |
| "required": ["email", "name"], |
| }, |
| }, |
| }, |
| { |
| "type": "function", |
| "function": { |
| "name": "get_next_topic", |
| "description": "Call to get the next exam topic. Returns empty string when no topics remain.", |
| "parameters": {"type": "object", "properties": {}, "required": []}, |
| }, |
| }, |
| { |
| "type": "function", |
| "function": { |
| "name": "end_exam", |
| "description": "Call after giving final feedback. Saves score (0-10) permanently.", |
| "parameters": { |
| "type": "object", |
| "properties": { |
| "email": {"type": "string"}, |
| "score": {"type": "number"}, |
| }, |
| "required": ["email", "score"], |
| }, |
| }, |
| }, |
| ] |
|
|
| SYSTEM_PROMPT = """You are an AI university examiner conducting an NLP course oral exam. |
| |
| RULES: |
| 1. Greet the student and ask for their full name and email. |
| 2. Once you have both, call start_exam(email, name). |
| - On error: ask to double-check details. |
| - On success: immediately call get_next_topic() to get the first topic. |
| |
| 3. For EACH topic, conduct a dialogue: |
| - Ask an open-ended question about the topic. |
| - Move to the NEXT QUESTION (not next topic) when: |
| a) The student gives a sufficiently complete answer — ask a follow-up to go deeper. |
| b) The student says "I don't know" or similar — acknowledge and ask a different/simpler question on the SAME topic. |
| c) It becomes clear the student's level won't change with more questions — then move to the next TOPIC. |
| - Move to the NEXT TOPIC (call get_next_topic()) when: |
| a) The student's knowledge on this topic is clearly established. |
| b) The student has said "I don't know" to 2+ questions in a row on this topic. |
| c) You have asked 3+ questions and have a clear picture of the student's level. |
| |
| 4. CRITICAL: Do NOT show the score or end the exam until get_next_topic() returns "". Cover ALL topics. |
| |
| 5. After all topics: |
| - Show the student their score (0-10) and feedback (strengths + what to improve). |
| - Call end_exam(email, score) with the EXACT numeric score you stated. |
| - Scoring guide: |
| * 9-10: Deep, accurate, detailed answers on all topics. |
| * 7-8: Good understanding, minor gaps. |
| * 5-6: Partial understanding, significant gaps. |
| * 3-4: Mostly "I don't know", very shallow answers. |
| * 0-2: No meaningful answers at all. |
| |
| 6. Be encouraging but STRICT and objective. "I don't know" lowers the score significantly. |
| 7. Match the student's language (Ukrainian or English). |
| 8. Never add meta-comments in parentheses. Speak naturally.""" |
|
|
|
|
| def _extract_first_json(s: str) -> str: |
| """Extract the first valid JSON object from a string.""" |
| depth = 0 |
| start = None |
| for i, c in enumerate(s): |
| if c == "{": |
| if start is None: |
| start = i |
| depth += 1 |
| elif c == "}": |
| depth -= 1 |
| if depth == 0 and start is not None: |
| candidate = s[start:i+1] |
| try: |
| json.loads(candidate) |
| return candidate |
| except (json.JSONDecodeError, ValueError): |
| start = None |
| return "{}" |
|
|
|
|
| def _parse_leaked_calls(text: str) -> list[tuple[str, str]]: |
| """Parse <function=NAME...{json}...> in any format Llama might use.""" |
| found = [] |
| |
| pattern = re.compile(r"<function=(\w+)[,\s(]*(\{)", re.DOTALL) |
| for m in pattern.finditer(text): |
| name = m.group(1) |
| json_start = text.index("{", m.start(2)) |
| args_str = _extract_first_json(text[json_start:]) |
| found.append((name, args_str)) |
| return found |
|
|
|
|
| class ExaminerAgent: |
| def __init__(self, api_key: str): |
| self.client = OpenAI( |
| api_key=api_key, |
| base_url="https://api.groq.com/openai/v1", |
| ) |
| self.messages: list[dict] = [] |
| self.history: list[Message] = [] |
| self.student_email = "" |
| self.exam_finished = False |
|
|
| def _log(self, role: str, content: str): |
| self.history.append({ |
| "role": role, "content": content, |
| "datetime": datetime.now().isoformat(timespec="seconds"), |
| }) |
|
|
| def _dispatch(self, name: str, arguments_str: str) -> str: |
| try: |
| inputs = json.loads(arguments_str) if arguments_str and arguments_str.strip() not in ("null", "None", "") else {} |
| except json.JSONDecodeError: |
| inputs = {} |
|
|
| self._log("tool_call", f"{name}({arguments_str})") |
|
|
| if name == "start_exam": |
| try: |
| topics = start_exam(inputs["email"], inputs["name"]) |
| set_topic_queue(topics) |
| self.student_email = inputs["email"] |
| return json.dumps({"topics": topics}) |
| except ValueError as e: |
| return json.dumps({"error": str(e)}) |
|
|
| elif name == "get_next_topic": |
| return json.dumps({"topic": get_next_topic()}) |
|
|
| elif name == "end_exam": |
| score = inputs.get("score", None) |
| |
| if not score: |
| for entry in reversed(self.history[-10:]): |
| found = re.findall(r"([0-9]+(?:\.[0-9]+)?)\s*(?:out of|/)\s*10", entry.get("content", "")) |
| if found: |
| score = float(found[-1]) |
| break |
| if not score: |
| score = 0.0 |
| if self.student_email: |
| end_exam(self.student_email, float(score), self.history) |
| self.exam_finished = True |
| return json.dumps({"status": "saved"}) |
|
|
| return json.dumps({"error": "unknown tool"}) |
|
|
| def _inject_leaked(self, leaked: list[tuple[str, str]]): |
| """Execute leaked tool calls and inject results into message history.""" |
| for name, args_str in leaked: |
| result = self._dispatch(name, args_str) |
| fake_id = f"call_{uuid.uuid4().hex[:8]}" |
| self.messages.append({ |
| "role": "assistant", |
| "content": "", |
| "tool_calls": [{"id": fake_id, "type": "function", |
| "function": {"name": name, "arguments": args_str}}], |
| }) |
| self.messages.append({"role": "tool", "tool_call_id": fake_id, "content": result}) |
|
|
| def _run_turn(self) -> str: |
| while True: |
| try: |
| response = self.client.chat.completions.create( |
| model="llama-3.3-70b-versatile", |
| max_tokens=1024, |
| tools=TOOLS, |
| tool_choice="auto", |
| messages=[ |
| {"role": "system", "content": SYSTEM_PROMPT}, |
| *self.messages, |
| ], |
| ) |
| except Exception as e: |
| leaked = _parse_leaked_calls(str(e)) |
| if leaked: |
| if self.messages and self.messages[-1]["role"] == "assistant": |
| bad = self.messages.pop() |
| clean = re.sub(r"<function=.*", "", bad.get("content", ""), flags=re.DOTALL).strip() |
| if clean: |
| self.messages.append({"role": "assistant", "content": clean}) |
| self._inject_leaked(leaked) |
| continue |
| raise |
|
|
| msg = response.choices[0].message |
| finish_reason = response.choices[0].finish_reason |
|
|
| assistant_msg: dict = {"role": "assistant", "content": msg.content or ""} |
| if msg.tool_calls: |
| assistant_msg["tool_calls"] = [ |
| {"id": tc.id, "type": "function", |
| "function": {"name": tc.function.name, "arguments": tc.function.arguments}} |
| for tc in msg.tool_calls |
| ] |
| self.messages.append(assistant_msg) |
|
|
| if finish_reason != "tool_calls" or not msg.tool_calls: |
| text = msg.content or "" |
| leaked = _parse_leaked_calls(text) |
| if leaked: |
| clean = re.sub(r"<function=.*", "", text, flags=re.DOTALL).strip() |
| self.messages.pop() |
| if clean: |
| self.messages.append({"role": "assistant", "content": clean}) |
| self._inject_leaked(leaked) |
| continue |
| self._log("system", text) |
| return text |
|
|
| for tc in msg.tool_calls: |
| result = self._dispatch(tc.function.name, tc.function.arguments) |
| self.messages.append({ |
| "role": "tool", |
| "tool_call_id": tc.id, |
| "content": result, |
| }) |
|
|
| |
| def start(self) -> str: |
| self.messages = [] |
| self.history = [] |
| self.student_email = "" |
| self.exam_finished = False |
| set_topic_queue([]) |
| self.messages = [{"role": "user", "content": "Hello, I am ready for my exam."}] |
| return self._run_turn() |
|
|
| def chat(self, user_message: str) -> str: |
| self._log("user", user_message) |
| self.messages.append({"role": "user", "content": user_message}) |
| return self._run_turn() |