import os import json from config import OPENAI_CHAT_MODEL from openai import OpenAI import json from supabase_ie import load_user_info from Classification_parameters import TOPIC_DESCRIPTIONS, RESPONSE_MODE_DESCRIPTIONS, TRACKED_FIELDS, FIELD_DESCRIPTIONS, ALLOWED_STORY_TOPICS client = OpenAI() allowed_str = ", ".join(ALLOWED_STORY_TOPICS) # ============================== # LLM Classification # ============================== def call_classification_llm(query: str, recent_history: list, user_info: dict, missing_fields: list): """ Calls the OpenAI model once to: - Extract user info ONLY from the latest query - Classify topic, response_mode, emotion using query + recent history - Detect if external news fetch is required (needs_news_fetch, news_topic, news_question) Returns a JSON object. """ # Build schema but treat lists/JSON specially schema_fields = {} for field in TRACKED_FIELDS: if field == "kids": schema_fields[field] = [{"name": "", "age": ""}] elif field in ["hobbies", "interests", "sports_played", "sports_watched"]: schema_fields[field] = [] else: schema_fields[field] = "" schema_json = json.dumps({ "extracted_user_info": schema_fields, "topic": "", "response_mode": "", "emotion": "", "topic_for_story": "", "needs_news_fetch": False, "news_topic": [], "news_question": "" }, indent=2) system_prompt = f""" You are Socrates, analyzing a new user query. You must return your response strictly in JSON, with no extra text. The JSON must follow this schema exactly: {schema_json} Rules: - extracted_user_info: * Derive ONLY from the latest user query (ignore history & assistant replies). * kids: must be an array of objects with "name" and "age". Example: [{{"name":"Demian","age":12}}, {{"name":"Selene","age":5}}] * hobbies, interests, sports_played, sports_watched: must be an array of strings. Example: ["surfing","football"] * all other fields: strings ("" if unknown). * Leave fields empty if not present in the latest query. - topic, response_mode, emotion: * Use both the latest query AND recent history for classification. * topic: one of {list(TOPIC_DESCRIPTIONS.keys())} * response_mode: one of {list(RESPONSE_MODE_DESCRIPTIONS.keys())} * emotion: one of ["curious","happy","sad","angry","neutral"] - topic_for_story: * Choose exactly one label from this list (closed set): [{allowed_str}] * Map common paraphrases and synonyms to the correct label: luck, lucky, bad luck, good luck, fate, chance -> fortune courage, bravery, grit, resilience, resolve -> mental_strength bullying, bullies, being mocked/harassed -> bullying fear, phobia, anxiety, panic -> phobias friend, friendship, companions -> friendship love, romance, relationship, breakup -> love sex, intimacy, desire -> sex meaning, purpose, "what is the point" -> meaning_of_life confidence, self-confidence, self-belief -> confidence * If a synonym clearly matches, DO NOT return "none" * Return "none" if no fitting label is clearly supported. * Never invent new labels. - needs_news_fetch: * true if the user is clearly asking for news, current events, or external information lookup. * false otherwise. - news_topic: * If needs_news_fetch=true → return a JSON array of 2–3 concise topic labels (e.g. ["US Open", "men's final", "tennis"]). * If false → return []. - news_question: * If needs_news_fetch=true → restate the user’s request as a clear research question. Example: "Who is going to play the US Open final?" * If false → return "". - Do not add any explanation or text outside the JSON. """ messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": query}, {"role": "assistant", "content": f"Recent history: {recent_history}"}, {"role": "assistant", "content": f"Known user info: {user_info}"}, {"role": "assistant", "content": f"Missing fields to track: {missing_fields}"} ] resp = client.chat.completions.create( model=OPENAI_CHAT_MODEL, messages=messages, temperature=0.2 ) raw_output = resp.choices[0].message.content try: parsed = json.loads(raw_output) return parsed except json.JSONDecodeError: print("⚠️ Could not parse LLM output as JSON:", raw_output) return { "extracted_user_info": schema_fields, "topic": "", "response_mode": "", "emotion": "", "topic_for_story": "none", "needs_news_fetch": False, "news_topic": [], "news_question": "" }