Socrates_docker / Agent_Chat_Classification_Helper.py
alesamodio's picture
socratic stories feature
d42c36e
import os
import json
from config import OPENAI_CHAT_MODEL
from openai import OpenAI
import json
from supabase_ie import load_user_info
from Classification_parameters import TOPIC_DESCRIPTIONS, RESPONSE_MODE_DESCRIPTIONS, TRACKED_FIELDS, FIELD_DESCRIPTIONS, ALLOWED_STORY_TOPICS
client = OpenAI()
allowed_str = ", ".join(ALLOWED_STORY_TOPICS)
# ==============================
# LLM Classification
# ==============================
def call_classification_llm(query: str, recent_history: list, user_info: dict, missing_fields: list):
"""
Calls the OpenAI model once to:
- Extract user info ONLY from the latest query
- Classify topic, response_mode, emotion using query + recent history
- Detect if external news fetch is required (needs_news_fetch, news_topic, news_question)
Returns a JSON object.
"""
# Build schema but treat lists/JSON specially
schema_fields = {}
for field in TRACKED_FIELDS:
if field == "kids":
schema_fields[field] = [{"name": "", "age": ""}]
elif field in ["hobbies", "interests", "sports_played", "sports_watched"]:
schema_fields[field] = []
else:
schema_fields[field] = ""
schema_json = json.dumps({
"extracted_user_info": schema_fields,
"topic": "",
"response_mode": "",
"emotion": "",
"topic_for_story": "",
"needs_news_fetch": False,
"news_topic": [],
"news_question": ""
}, indent=2)
system_prompt = f"""
You are Socrates, analyzing a new user query.
You must return your response strictly in JSON, with no extra text.
The JSON must follow this schema exactly:
{schema_json}
Rules:
- extracted_user_info:
* Derive ONLY from the latest user query (ignore history & assistant replies).
* kids: must be an array of objects with "name" and "age".
Example: [{{"name":"Demian","age":12}}, {{"name":"Selene","age":5}}]
* hobbies, interests, sports_played, sports_watched: must be an array of strings.
Example: ["surfing","football"]
* all other fields: strings ("" if unknown).
* Leave fields empty if not present in the latest query.
- topic, response_mode, emotion:
* Use both the latest query AND recent history for classification.
* topic: one of {list(TOPIC_DESCRIPTIONS.keys())}
* response_mode: one of {list(RESPONSE_MODE_DESCRIPTIONS.keys())}
* emotion: one of ["curious","happy","sad","angry","neutral"]
- topic_for_story:
* Choose exactly one label from this list (closed set):
[{allowed_str}]
* Map common paraphrases and synonyms to the correct label:
luck, lucky, bad luck, good luck, fate, chance -> fortune
courage, bravery, grit, resilience, resolve -> mental_strength
bullying, bullies, being mocked/harassed -> bullying
fear, phobia, anxiety, panic -> phobias
friend, friendship, companions -> friendship
love, romance, relationship, breakup -> love
sex, intimacy, desire -> sex
meaning, purpose, "what is the point" -> meaning_of_life
confidence, self-confidence, self-belief -> confidence
* If a synonym clearly matches, DO NOT return "none"
* Return "none" if no fitting label is clearly supported.
* Never invent new labels.
- needs_news_fetch:
* true if the user is clearly asking for news, current events, or external information lookup.
* false otherwise.
- news_topic:
* If needs_news_fetch=true → return a JSON array of 2–3 concise topic labels (e.g. ["US Open", "men's final", "tennis"]).
* If false → return [].
- news_question:
* If needs_news_fetch=true → restate the user’s request as a clear research question.
Example: "Who is going to play the US Open final?"
* If false → return "".
- Do not add any explanation or text outside the JSON.
"""
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": query},
{"role": "assistant", "content": f"Recent history: {recent_history}"},
{"role": "assistant", "content": f"Known user info: {user_info}"},
{"role": "assistant", "content": f"Missing fields to track: {missing_fields}"}
]
resp = client.chat.completions.create(
model=OPENAI_CHAT_MODEL,
messages=messages,
temperature=0.2
)
raw_output = resp.choices[0].message.content
try:
parsed = json.loads(raw_output)
return parsed
except json.JSONDecodeError:
print("⚠️ Could not parse LLM output as JSON:", raw_output)
return {
"extracted_user_info": schema_fields,
"topic": "",
"response_mode": "",
"emotion": "",
"topic_for_story": "none",
"needs_news_fetch": False,
"news_topic": [],
"news_question": ""
}