File size: 4,992 Bytes
433f3f1
 
 
 
 
 
dc3dc12
433f3f1
 
10940f4
433f3f1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dc3dc12
433f3f1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dc3dc12
 
 
 
d42c36e
 
 
 
 
 
 
 
 
 
 
dc3dc12
 
433f3f1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dc3dc12
433f3f1
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import os
import json
from config import OPENAI_CHAT_MODEL
from openai import OpenAI
import json
from supabase_ie import load_user_info
from Classification_parameters import TOPIC_DESCRIPTIONS, RESPONSE_MODE_DESCRIPTIONS, TRACKED_FIELDS, FIELD_DESCRIPTIONS, ALLOWED_STORY_TOPICS
client = OpenAI()

allowed_str = ", ".join(ALLOWED_STORY_TOPICS)

# ==============================
# LLM Classification
# ==============================
def call_classification_llm(query: str, recent_history: list, user_info: dict, missing_fields: list):
    """
    Calls the OpenAI model once to:
    - Extract user info ONLY from the latest query
    - Classify topic, response_mode, emotion using query + recent history
    - Detect if external news fetch is required (needs_news_fetch, news_topic, news_question)
    Returns a JSON object.
    """

    # Build schema but treat lists/JSON specially
    schema_fields = {}
    for field in TRACKED_FIELDS:
        if field == "kids":
            schema_fields[field] = [{"name": "", "age": ""}]
        elif field in ["hobbies", "interests", "sports_played", "sports_watched"]:
            schema_fields[field] = []
        else:
            schema_fields[field] = ""

    schema_json = json.dumps({
        "extracted_user_info": schema_fields,
        "topic": "",
        "response_mode": "",
        "emotion": "",
        "topic_for_story": "",
        "needs_news_fetch": False,
        "news_topic": [],
        "news_question": ""
    }, indent=2)

    system_prompt = f"""
    You are Socrates, analyzing a new user query.

    You must return your response strictly in JSON, with no extra text.
    The JSON must follow this schema exactly:

    {schema_json}

    Rules:
    - extracted_user_info:
      * Derive ONLY from the latest user query (ignore history & assistant replies).
      * kids: must be an array of objects with "name" and "age".
        Example: [{{"name":"Demian","age":12}}, {{"name":"Selene","age":5}}]
      * hobbies, interests, sports_played, sports_watched: must be an array of strings.
        Example: ["surfing","football"]
      * all other fields: strings ("" if unknown).
      * Leave fields empty if not present in the latest query.

    - topic, response_mode, emotion:
      * Use both the latest query AND recent history for classification.
      * topic: one of {list(TOPIC_DESCRIPTIONS.keys())}
      * response_mode: one of {list(RESPONSE_MODE_DESCRIPTIONS.keys())}
      * emotion: one of ["curious","happy","sad","angry","neutral"]
    
    - topic_for_story:
      * Choose exactly one label from this list (closed set):
        [{allowed_str}]
      * Map common paraphrases and synonyms to the correct label:
      luck, lucky, bad luck, good luck, fate, chance -> fortune
      courage, bravery, grit, resilience, resolve -> mental_strength
      bullying, bullies, being mocked/harassed -> bullying
      fear, phobia, anxiety, panic -> phobias
      friend, friendship, companions -> friendship
      love, romance, relationship, breakup -> love
      sex, intimacy, desire -> sex
      meaning, purpose, "what is the point" -> meaning_of_life
      confidence, self-confidence, self-belief -> confidence
      * If a synonym clearly matches, DO NOT return "none"
      * Return "none" if no fitting label is clearly supported.
      * Never invent new labels.

    - needs_news_fetch:
      * true if the user is clearly asking for news, current events, or external information lookup.
      * false otherwise.

    - news_topic:
      * If needs_news_fetch=true → return a JSON array of 2–3 concise topic labels (e.g. ["US Open", "men's final", "tennis"]).
      * If false → return [].

    - news_question:
      * If needs_news_fetch=true → restate the user’s request as a clear research question.
        Example: "Who is going to play the US Open final?"
      * If false → return "".

    - Do not add any explanation or text outside the JSON.
    """

    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": query},
        {"role": "assistant", "content": f"Recent history: {recent_history}"},
        {"role": "assistant", "content": f"Known user info: {user_info}"},
        {"role": "assistant", "content": f"Missing fields to track: {missing_fields}"}
    ]

    resp = client.chat.completions.create(
        model=OPENAI_CHAT_MODEL,
        messages=messages,
        temperature=0.2
    )

    raw_output = resp.choices[0].message.content

    try:
        parsed = json.loads(raw_output)
        return parsed
    except json.JSONDecodeError:
        print("⚠️ Could not parse LLM output as JSON:", raw_output)
        return {
            "extracted_user_info": schema_fields,
            "topic": "",
            "response_mode": "",
            "emotion": "",
            "topic_for_story": "none",
            "needs_news_fetch": False,
            "news_topic": [],
            "news_question": ""
        }