Spaces:

Denysyk
/

Lab6

Sleeping

App Files Files Community

Lab6 / agent.py

Denysyk

Update agent.py

122ae9c verified 7 days ago

raw

history blame contribute delete

10.2 kB

	"""
	AI Examiner Agent — Groq with tool calling + fallback parser for leaked function calls.
	"""

	import json
	import re
	import uuid
	from datetime import datetime
	from openai import OpenAI

	from exam_functions import (
	start_exam, get_next_topic, end_exam, set_topic_queue, Message,
	)

	TOOLS = [
	{
	"type": "function",
	"function": {
	"name": "start_exam",
	"description": "Call once the student provided name and email. Returns list of exam topics.",
	"parameters": {
	"type": "object",
	"properties": {
	"email": {"type": "string"},
	"name": {"type": "string"},
	},
	"required": ["email", "name"],
	},
	},
	},
	{
	"type": "function",
	"function": {
	"name": "get_next_topic",
	"description": "Call to get the next exam topic. Returns empty string when no topics remain.",
	"parameters": {"type": "object", "properties": {}, "required": []},
	},
	},
	{
	"type": "function",
	"function": {
	"name": "end_exam",
	"description": "Call after giving final feedback. Saves score (0-10) permanently.",
	"parameters": {
	"type": "object",
	"properties": {
	"email": {"type": "string"},
	"score": {"type": "number"},
	},
	"required": ["email", "score"],
	},
	},
	},
	]

	SYSTEM_PROMPT = """You are an AI university examiner conducting an NLP course oral exam.

	RULES:
	1. Greet the student and ask for their full name and email.
	2. Once you have both, call start_exam(email, name).
	- On error: ask to double-check details.
	- On success: immediately call get_next_topic() to get the first topic.

	3. For EACH topic, conduct a dialogue:
	- Ask an open-ended question about the topic.
	- Move to the NEXT QUESTION (not next topic) when:
	a) The student gives a sufficiently complete answer — ask a follow-up to go deeper.
	b) The student says "I don't know" or similar — acknowledge and ask a different/simpler question on the SAME topic.
	c) It becomes clear the student's level won't change with more questions — then move to the next TOPIC.
	- Move to the NEXT TOPIC (call get_next_topic()) when:
	a) The student's knowledge on this topic is clearly established.
	b) The student has said "I don't know" to 2+ questions in a row on this topic.
	c) You have asked 3+ questions and have a clear picture of the student's level.

	4. CRITICAL: Do NOT show the score or end the exam until get_next_topic() returns "". Cover ALL topics.

	5. After all topics:
	- Show the student their score (0-10) and feedback (strengths + what to improve).
	- Call end_exam(email, score) with the EXACT numeric score you stated.
	- Scoring guide:
	* 9-10: Deep, accurate, detailed answers on all topics.
	* 7-8: Good understanding, minor gaps.
	* 5-6: Partial understanding, significant gaps.
	* 3-4: Mostly "I don't know", very shallow answers.
	* 0-2: No meaningful answers at all.

	6. Be encouraging but STRICT and objective. "I don't know" lowers the score significantly.
	7. Match the student's language (Ukrainian or English).
	8. Never add meta-comments in parentheses. Speak naturally."""


	def _extract_first_json(s: str) -> str:
	"""Extract the first valid JSON object from a string."""
	depth = 0
	start = None
	for i, c in enumerate(s):
	if c == "{":
	if start is None:
	start = i
	depth += 1
	elif c == "}":
	depth -= 1
	if depth == 0 and start is not None:
	candidate = s[start:i+1]
	try:
	json.loads(candidate)
	return candidate
	except (json.JSONDecodeError, ValueError):
	start = None
	return "{}"


	def _parse_leaked_calls(text: str) -> list[tuple[str, str]]:
	"""Parse <function=NAME...{json}...> in any format Llama might use."""
	found = []
	# Find function name, then extract first valid JSON after it
	pattern = re.compile(r"<function=(\w+)[,\s(]*(\{)", re.DOTALL)
	for m in pattern.finditer(text):
	name = m.group(1)
	json_start = text.index("{", m.start(2))
	args_str = _extract_first_json(text[json_start:])
	found.append((name, args_str))
	return found


	class ExaminerAgent:
	def __init__(self, api_key: str):
	self.client = OpenAI(
	api_key=api_key,
	base_url="https://api.groq.com/openai/v1",
	)
	self.messages: list[dict] = []
	self.history: list[Message] = []
	self.student_email = ""
	self.exam_finished = False

	def _log(self, role: str, content: str):
	self.history.append({
	"role": role, "content": content,
	"datetime": datetime.now().isoformat(timespec="seconds"),
	})

	def _dispatch(self, name: str, arguments_str: str) -> str:
	try:
	inputs = json.loads(arguments_str) if arguments_str and arguments_str.strip() not in ("null", "None", "") else {}
	except json.JSONDecodeError:
	inputs = {}

	self._log("tool_call", f"{name}({arguments_str})")

	if name == "start_exam":
	try:
	topics = start_exam(inputs["email"], inputs["name"])
	set_topic_queue(topics)
	self.student_email = inputs["email"]
	return json.dumps({"topics": topics})
	except ValueError as e:
	return json.dumps({"error": str(e)})

	elif name == "get_next_topic":
	return json.dumps({"topic": get_next_topic()})

	elif name == "end_exam":
	score = inputs.get("score", None)
	# Fallback: extract score from chat history if missing or zero
	if not score:
	for entry in reversed(self.history[-10:]):
	found = re.findall(r"([0-9]+(?:\.[0-9]+)?)\s(?:out of\|/)\s10", entry.get("content", ""))
	if found:
	score = float(found[-1])
	break
	if not score:
	score = 0.0
	if self.student_email:
	end_exam(self.student_email, float(score), self.history)
	self.exam_finished = True
	return json.dumps({"status": "saved"})

	return json.dumps({"error": "unknown tool"})

	def _inject_leaked(self, leaked: list[tuple[str, str]]):
	"""Execute leaked tool calls and inject results into message history."""
	for name, args_str in leaked:
	result = self._dispatch(name, args_str)
	fake_id = f"call_{uuid.uuid4().hex[:8]}"
	self.messages.append({
	"role": "assistant",
	"content": "",
	"tool_calls": [{"id": fake_id, "type": "function",
	"function": {"name": name, "arguments": args_str}}],
	})
	self.messages.append({"role": "tool", "tool_call_id": fake_id, "content": result})

	def _run_turn(self) -> str:
	while True:
	try:
	response = self.client.chat.completions.create(
	model="llama-3.3-70b-versatile",
	max_tokens=1024,
	tools=TOOLS,
	tool_choice="auto",
	messages=[
	{"role": "system", "content": SYSTEM_PROMPT},
	*self.messages,
	],
	)
	except Exception as e:
	leaked = _parse_leaked_calls(str(e))
	if leaked:
	if self.messages and self.messages[-1]["role"] == "assistant":
	bad = self.messages.pop()
	clean = re.sub(r"<function=.*", "", bad.get("content", ""), flags=re.DOTALL).strip()
	if clean:
	self.messages.append({"role": "assistant", "content": clean})
	self._inject_leaked(leaked)
	continue
	raise

	msg = response.choices[0].message
	finish_reason = response.choices[0].finish_reason

	assistant_msg: dict = {"role": "assistant", "content": msg.content or ""}
	if msg.tool_calls:
	assistant_msg["tool_calls"] = [
	{"id": tc.id, "type": "function",
	"function": {"name": tc.function.name, "arguments": tc.function.arguments}}
	for tc in msg.tool_calls
	]
	self.messages.append(assistant_msg)

	if finish_reason != "tool_calls" or not msg.tool_calls:
	text = msg.content or ""
	leaked = _parse_leaked_calls(text)
	if leaked:
	clean = re.sub(r"<function=.*", "", text, flags=re.DOTALL).strip()
	self.messages.pop()
	if clean:
	self.messages.append({"role": "assistant", "content": clean})
	self._inject_leaked(leaked)
	continue
	self._log("system", text)
	return text

	for tc in msg.tool_calls:
	result = self._dispatch(tc.function.name, tc.function.arguments)
	self.messages.append({
	"role": "tool",
	"tool_call_id": tc.id,
	"content": result,
	})


	def start(self) -> str:
	self.messages = []
	self.history = []
	self.student_email = ""
	self.exam_finished = False
	set_topic_queue([])
	self.messages = [{"role": "user", "content": "Hello, I am ready for my exam."}]
	return self._run_turn()

	def chat(self, user_message: str) -> str:
	self._log("user", user_message)
	self.messages.append({"role": "user", "content": user_message})
	return self._run_turn()