Spaces:

alesamodio
/

Socrates_docker

Sleeping

App Files Files Community

Socrates_docker / Agent_Chat_Classification_Helper.py

alesamodio

socratic stories feature

d42c36e 3 months ago

raw

history blame contribute delete

4.99 kB

	import os
	import json
	from config import OPENAI_CHAT_MODEL
	from openai import OpenAI
	import json
	from supabase_ie import load_user_info
	from Classification_parameters import TOPIC_DESCRIPTIONS, RESPONSE_MODE_DESCRIPTIONS, TRACKED_FIELDS, FIELD_DESCRIPTIONS, ALLOWED_STORY_TOPICS
	client = OpenAI()

	allowed_str = ", ".join(ALLOWED_STORY_TOPICS)

	# ==============================
	# LLM Classification
	# ==============================
	def call_classification_llm(query: str, recent_history: list, user_info: dict, missing_fields: list):
	"""
	Calls the OpenAI model once to:
	- Extract user info ONLY from the latest query
	- Classify topic, response_mode, emotion using query + recent history
	- Detect if external news fetch is required (needs_news_fetch, news_topic, news_question)
	Returns a JSON object.
	"""

	# Build schema but treat lists/JSON specially
	schema_fields = {}
	for field in TRACKED_FIELDS:
	if field == "kids":
	schema_fields[field] = [{"name": "", "age": ""}]
	elif field in ["hobbies", "interests", "sports_played", "sports_watched"]:
	schema_fields[field] = []
	else:
	schema_fields[field] = ""

	schema_json = json.dumps({
	"extracted_user_info": schema_fields,
	"topic": "",
	"response_mode": "",
	"emotion": "",
	"topic_for_story": "",
	"needs_news_fetch": False,
	"news_topic": [],
	"news_question": ""
	}, indent=2)

	system_prompt = f"""
	You are Socrates, analyzing a new user query.

	You must return your response strictly in JSON, with no extra text.
	The JSON must follow this schema exactly:

	{schema_json}

	Rules:
	- extracted_user_info:
	* Derive ONLY from the latest user query (ignore history & assistant replies).
	* kids: must be an array of objects with "name" and "age".
	Example: [{{"name":"Demian","age":12}}, {{"name":"Selene","age":5}}]
	* hobbies, interests, sports_played, sports_watched: must be an array of strings.
	Example: ["surfing","football"]
	* all other fields: strings ("" if unknown).
	* Leave fields empty if not present in the latest query.

	- topic, response_mode, emotion:
	* Use both the latest query AND recent history for classification.
	* topic: one of {list(TOPIC_DESCRIPTIONS.keys())}
	* response_mode: one of {list(RESPONSE_MODE_DESCRIPTIONS.keys())}
	* emotion: one of ["curious","happy","sad","angry","neutral"]

	- topic_for_story:
	* Choose exactly one label from this list (closed set):
	[{allowed_str}]
	* Map common paraphrases and synonyms to the correct label:
	luck, lucky, bad luck, good luck, fate, chance -> fortune
	courage, bravery, grit, resilience, resolve -> mental_strength
	bullying, bullies, being mocked/harassed -> bullying
	fear, phobia, anxiety, panic -> phobias
	friend, friendship, companions -> friendship
	love, romance, relationship, breakup -> love
	sex, intimacy, desire -> sex
	meaning, purpose, "what is the point" -> meaning_of_life
	confidence, self-confidence, self-belief -> confidence
	* If a synonym clearly matches, DO NOT return "none"
	* Return "none" if no fitting label is clearly supported.
	* Never invent new labels.

	- needs_news_fetch:
	* true if the user is clearly asking for news, current events, or external information lookup.
	* false otherwise.

	- news_topic:
	* If needs_news_fetch=true → return a JSON array of 2–3 concise topic labels (e.g. ["US Open", "men's final", "tennis"]).
	* If false → return [].

	- news_question:
	* If needs_news_fetch=true → restate the user’s request as a clear research question.
	Example: "Who is going to play the US Open final?"
	* If false → return "".

	- Do not add any explanation or text outside the JSON.
	"""

	messages = [
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": query},
	{"role": "assistant", "content": f"Recent history: {recent_history}"},
	{"role": "assistant", "content": f"Known user info: {user_info}"},
	{"role": "assistant", "content": f"Missing fields to track: {missing_fields}"}
	]

	resp = client.chat.completions.create(
	model=OPENAI_CHAT_MODEL,
	messages=messages,
	temperature=0.2
	)

	raw_output = resp.choices[0].message.content

	try:
	parsed = json.loads(raw_output)
	return parsed
	except json.JSONDecodeError:
	print("⚠️ Could not parse LLM output as JSON:", raw_output)
	return {
	"extracted_user_info": schema_fields,
	"topic": "",
	"response_mode": "",
	"emotion": "",
	"topic_for_story": "none",
	"needs_news_fetch": False,
	"news_topic": [],
	"news_question": ""
	}