Lab6 / agent.py
Denysyk's picture
Update agent.py
122ae9c verified
"""
AI Examiner Agent — Groq with tool calling + fallback parser for leaked function calls.
"""
import json
import re
import uuid
from datetime import datetime
from openai import OpenAI
from exam_functions import (
start_exam, get_next_topic, end_exam, set_topic_queue, Message,
)
TOOLS = [
{
"type": "function",
"function": {
"name": "start_exam",
"description": "Call once the student provided name and email. Returns list of exam topics.",
"parameters": {
"type": "object",
"properties": {
"email": {"type": "string"},
"name": {"type": "string"},
},
"required": ["email", "name"],
},
},
},
{
"type": "function",
"function": {
"name": "get_next_topic",
"description": "Call to get the next exam topic. Returns empty string when no topics remain.",
"parameters": {"type": "object", "properties": {}, "required": []},
},
},
{
"type": "function",
"function": {
"name": "end_exam",
"description": "Call after giving final feedback. Saves score (0-10) permanently.",
"parameters": {
"type": "object",
"properties": {
"email": {"type": "string"},
"score": {"type": "number"},
},
"required": ["email", "score"],
},
},
},
]
SYSTEM_PROMPT = """You are an AI university examiner conducting an NLP course oral exam.
RULES:
1. Greet the student and ask for their full name and email.
2. Once you have both, call start_exam(email, name).
- On error: ask to double-check details.
- On success: immediately call get_next_topic() to get the first topic.
3. For EACH topic, conduct a dialogue:
- Ask an open-ended question about the topic.
- Move to the NEXT QUESTION (not next topic) when:
a) The student gives a sufficiently complete answer — ask a follow-up to go deeper.
b) The student says "I don't know" or similar — acknowledge and ask a different/simpler question on the SAME topic.
c) It becomes clear the student's level won't change with more questions — then move to the next TOPIC.
- Move to the NEXT TOPIC (call get_next_topic()) when:
a) The student's knowledge on this topic is clearly established.
b) The student has said "I don't know" to 2+ questions in a row on this topic.
c) You have asked 3+ questions and have a clear picture of the student's level.
4. CRITICAL: Do NOT show the score or end the exam until get_next_topic() returns "". Cover ALL topics.
5. After all topics:
- Show the student their score (0-10) and feedback (strengths + what to improve).
- Call end_exam(email, score) with the EXACT numeric score you stated.
- Scoring guide:
* 9-10: Deep, accurate, detailed answers on all topics.
* 7-8: Good understanding, minor gaps.
* 5-6: Partial understanding, significant gaps.
* 3-4: Mostly "I don't know", very shallow answers.
* 0-2: No meaningful answers at all.
6. Be encouraging but STRICT and objective. "I don't know" lowers the score significantly.
7. Match the student's language (Ukrainian or English).
8. Never add meta-comments in parentheses. Speak naturally."""
def _extract_first_json(s: str) -> str:
"""Extract the first valid JSON object from a string."""
depth = 0
start = None
for i, c in enumerate(s):
if c == "{":
if start is None:
start = i
depth += 1
elif c == "}":
depth -= 1
if depth == 0 and start is not None:
candidate = s[start:i+1]
try:
json.loads(candidate)
return candidate
except (json.JSONDecodeError, ValueError):
start = None
return "{}"
def _parse_leaked_calls(text: str) -> list[tuple[str, str]]:
"""Parse <function=NAME...{json}...> in any format Llama might use."""
found = []
# Find function name, then extract first valid JSON after it
pattern = re.compile(r"<function=(\w+)[,\s(]*(\{)", re.DOTALL)
for m in pattern.finditer(text):
name = m.group(1)
json_start = text.index("{", m.start(2))
args_str = _extract_first_json(text[json_start:])
found.append((name, args_str))
return found
class ExaminerAgent:
def __init__(self, api_key: str):
self.client = OpenAI(
api_key=api_key,
base_url="https://api.groq.com/openai/v1",
)
self.messages: list[dict] = []
self.history: list[Message] = []
self.student_email = ""
self.exam_finished = False
def _log(self, role: str, content: str):
self.history.append({
"role": role, "content": content,
"datetime": datetime.now().isoformat(timespec="seconds"),
})
def _dispatch(self, name: str, arguments_str: str) -> str:
try:
inputs = json.loads(arguments_str) if arguments_str and arguments_str.strip() not in ("null", "None", "") else {}
except json.JSONDecodeError:
inputs = {}
self._log("tool_call", f"{name}({arguments_str})")
if name == "start_exam":
try:
topics = start_exam(inputs["email"], inputs["name"])
set_topic_queue(topics)
self.student_email = inputs["email"]
return json.dumps({"topics": topics})
except ValueError as e:
return json.dumps({"error": str(e)})
elif name == "get_next_topic":
return json.dumps({"topic": get_next_topic()})
elif name == "end_exam":
score = inputs.get("score", None)
# Fallback: extract score from chat history if missing or zero
if not score:
for entry in reversed(self.history[-10:]):
found = re.findall(r"([0-9]+(?:\.[0-9]+)?)\s*(?:out of|/)\s*10", entry.get("content", ""))
if found:
score = float(found[-1])
break
if not score:
score = 0.0
if self.student_email:
end_exam(self.student_email, float(score), self.history)
self.exam_finished = True
return json.dumps({"status": "saved"})
return json.dumps({"error": "unknown tool"})
def _inject_leaked(self, leaked: list[tuple[str, str]]):
"""Execute leaked tool calls and inject results into message history."""
for name, args_str in leaked:
result = self._dispatch(name, args_str)
fake_id = f"call_{uuid.uuid4().hex[:8]}"
self.messages.append({
"role": "assistant",
"content": "",
"tool_calls": [{"id": fake_id, "type": "function",
"function": {"name": name, "arguments": args_str}}],
})
self.messages.append({"role": "tool", "tool_call_id": fake_id, "content": result})
def _run_turn(self) -> str:
while True:
try:
response = self.client.chat.completions.create(
model="llama-3.3-70b-versatile",
max_tokens=1024,
tools=TOOLS,
tool_choice="auto",
messages=[
{"role": "system", "content": SYSTEM_PROMPT},
*self.messages,
],
)
except Exception as e:
leaked = _parse_leaked_calls(str(e))
if leaked:
if self.messages and self.messages[-1]["role"] == "assistant":
bad = self.messages.pop()
clean = re.sub(r"<function=.*", "", bad.get("content", ""), flags=re.DOTALL).strip()
if clean:
self.messages.append({"role": "assistant", "content": clean})
self._inject_leaked(leaked)
continue
raise
msg = response.choices[0].message
finish_reason = response.choices[0].finish_reason
assistant_msg: dict = {"role": "assistant", "content": msg.content or ""}
if msg.tool_calls:
assistant_msg["tool_calls"] = [
{"id": tc.id, "type": "function",
"function": {"name": tc.function.name, "arguments": tc.function.arguments}}
for tc in msg.tool_calls
]
self.messages.append(assistant_msg)
if finish_reason != "tool_calls" or not msg.tool_calls:
text = msg.content or ""
leaked = _parse_leaked_calls(text)
if leaked:
clean = re.sub(r"<function=.*", "", text, flags=re.DOTALL).strip()
self.messages.pop()
if clean:
self.messages.append({"role": "assistant", "content": clean})
self._inject_leaked(leaked)
continue
self._log("system", text)
return text
for tc in msg.tool_calls:
result = self._dispatch(tc.function.name, tc.function.arguments)
self.messages.append({
"role": "tool",
"tool_call_id": tc.id,
"content": result,
})
def start(self) -> str:
self.messages = []
self.history = []
self.student_email = ""
self.exam_finished = False
set_topic_queue([])
self.messages = [{"role": "user", "content": "Hello, I am ready for my exam."}]
return self._run_turn()
def chat(self, user_message: str) -> str:
self._log("user", user_message)
self.messages.append({"role": "user", "content": user_message})
return self._run_turn()