Spaces:
Sleeping
Sleeping
backend fix
Browse files- app.py +139 -75
- config.py +1 -1
- faiss_service.py +132 -6
- llm_service.py +607 -334
- routes/auth_routes.py +1 -1
- routes/matching_routes.py +98 -1
app.py
CHANGED
|
@@ -1,18 +1,45 @@
|
|
| 1 |
APP_BUILD = "HF-BUILD-2025-12-15-01"
|
| 2 |
print("β
RUNNING APP BUILD:", APP_BUILD, "FILE:", __file__)
|
| 3 |
-
|
|
|
|
| 4 |
import os
|
| 5 |
import datetime
|
| 6 |
import traceback
|
|
|
|
| 7 |
from flask import Flask, jsonify, request
|
| 8 |
from flask_cors import CORS
|
| 9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
from config import (
|
| 11 |
-
SQL_DRIVER,
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
)
|
|
|
|
| 14 |
from models import db
|
| 15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
def create_app():
|
| 18 |
app = Flask(__name__)
|
|
@@ -28,7 +55,7 @@ def create_app():
|
|
| 28 |
# ----------------------------
|
| 29 |
@app.before_request
|
| 30 |
def log_request_info():
|
| 31 |
-
print(f"\n{'='*60}")
|
| 32 |
print("π₯ INCOMING REQUEST:")
|
| 33 |
print(f" Time: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
| 34 |
print(f" Method: {request.method}")
|
|
@@ -38,7 +65,7 @@ def create_app():
|
|
| 38 |
if request.user_agent:
|
| 39 |
print(f" User Agent: {request.user_agent.string[:80]}...")
|
| 40 |
print(f" Referrer: {request.referrer}")
|
| 41 |
-
print(f"{'='*60}")
|
| 42 |
|
| 43 |
# ----------------------------
|
| 44 |
# DB init
|
|
@@ -60,7 +87,7 @@ def create_app():
|
|
| 60 |
import routes as routes_module
|
| 61 |
print("β
DEBUG: Imported routes module")
|
| 62 |
|
| 63 |
-
# Get blueprints safely (
|
| 64 |
candidates = [
|
| 65 |
("auth_bp", getattr(routes_module, "auth_bp", None), "/api"),
|
| 66 |
("profiles_bp", getattr(routes_module, "profiles_bp", None), None),
|
|
@@ -69,8 +96,10 @@ def create_app():
|
|
| 69 |
("llm_bp", getattr(routes_module, "llm_bp", None), None),
|
| 70 |
]
|
| 71 |
|
| 72 |
-
print(
|
| 73 |
-
|
|
|
|
|
|
|
| 74 |
|
| 75 |
for name, bp, prefix in candidates:
|
| 76 |
if bp is None:
|
|
@@ -95,92 +124,126 @@ def create_app():
|
|
| 95 |
print(f"β DEBUG: Failed to import routes or register blueprints: {e}")
|
| 96 |
traceback.print_exc()
|
| 97 |
|
| 98 |
-
#
|
| 99 |
# Debug endpoints (always available)
|
| 100 |
-
#
|
| 101 |
-
@app.get("/api/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
@app.get("/health")
|
|
|
|
| 103 |
def health():
|
| 104 |
-
#
|
| 105 |
-
faiss_loaded = False
|
| 106 |
-
knowledge_loaded = False
|
| 107 |
llm_mode = "offline-fallback"
|
| 108 |
-
|
| 109 |
try:
|
| 110 |
-
from llm_service import CHAIN_BATCH
|
| 111 |
if CHAIN_BATCH is not None:
|
| 112 |
llm_mode = "openai"
|
| 113 |
except Exception:
|
| 114 |
pass
|
| 115 |
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
knowledge_loaded = knowledge is not None and hasattr(knowledge, "indices") and len(knowledge.indices) > 0
|
| 120 |
-
except Exception:
|
| 121 |
-
pass
|
| 122 |
-
|
| 123 |
-
return jsonify({
|
| 124 |
-
"status": "ok",
|
| 125 |
-
"huggingface": bool(IS_HUGGING_FACE),
|
| 126 |
-
"llm": llm_mode,
|
| 127 |
-
"has_openai_key": bool(os.getenv("OPENAI_API_KEY")),
|
| 128 |
-
"db": {"server": SQL_SERVER, "database": SQL_DB},
|
| 129 |
-
"faiss_loaded": faiss_loaded,
|
| 130 |
-
"knowledge_base_loaded": knowledge_loaded,
|
| 131 |
-
"blueprints": blueprint_status
|
| 132 |
-
})
|
| 133 |
-
|
| 134 |
-
@app.get("/api/_routes")
|
| 135 |
-
@app.get("/debug/routes")
|
| 136 |
-
def list_routes():
|
| 137 |
-
routes_list = []
|
| 138 |
-
for rule in app.url_map.iter_rules():
|
| 139 |
-
routes_list.append({
|
| 140 |
-
"endpoint": rule.endpoint,
|
| 141 |
-
"methods": sorted(list(rule.methods)),
|
| 142 |
-
"rule": str(rule)
|
| 143 |
-
})
|
| 144 |
|
| 145 |
-
#
|
| 146 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 147 |
|
| 148 |
-
return jsonify(
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 153 |
|
|
|
|
|
|
|
|
|
|
| 154 |
@app.get("/")
|
| 155 |
def home():
|
| 156 |
-
return jsonify(
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
"
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 169 |
|
| 170 |
-
#
|
| 171 |
# Error handlers
|
| 172 |
-
#
|
| 173 |
@app.errorhandler(404)
|
| 174 |
def not_found(error):
|
| 175 |
return jsonify({"error": "Endpoint not found", "path": request.path}), 404
|
| 176 |
|
| 177 |
@app.errorhandler(405)
|
| 178 |
def method_not_allowed(error):
|
| 179 |
-
return
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 184 |
|
| 185 |
@app.errorhandler(500)
|
| 186 |
def internal_error(error):
|
|
@@ -189,10 +252,11 @@ def create_app():
|
|
| 189 |
return app
|
| 190 |
|
| 191 |
|
| 192 |
-
app = create_app()
|
| 193 |
-
|
| 194 |
if __name__ == "__main__":
|
| 195 |
-
|
|
|
|
|
|
|
| 196 |
print("π Flask server starting...")
|
| 197 |
-
print(f"{'='*60}")
|
|
|
|
| 198 |
app.run(host="0.0.0.0", port=5000, debug=True)
|
|
|
|
| 1 |
APP_BUILD = "HF-BUILD-2025-12-15-01"
|
| 2 |
print("β
RUNNING APP BUILD:", APP_BUILD, "FILE:", __file__)
|
| 3 |
+
|
| 4 |
+
# app.py (HF-safe + corrected health + debug routes)
|
| 5 |
import os
|
| 6 |
import datetime
|
| 7 |
import traceback
|
| 8 |
+
|
| 9 |
from flask import Flask, jsonify, request
|
| 10 |
from flask_cors import CORS
|
| 11 |
|
| 12 |
+
# FAISS / knowledge
|
| 13 |
+
from faiss_service import FAISS_INDEX, TEXT_CHUNKS, HAS_FAISS, knowledge
|
| 14 |
+
|
| 15 |
+
# Config
|
| 16 |
from config import (
|
| 17 |
+
SQL_DRIVER,
|
| 18 |
+
SQL_SERVER,
|
| 19 |
+
SQL_DB,
|
| 20 |
+
SQL_TRUSTED,
|
| 21 |
+
SQL_USER,
|
| 22 |
+
SQL_PASSWORD,
|
| 23 |
+
SQL_PORT,
|
| 24 |
+
SQL_ENCRYPT,
|
| 25 |
+
SQL_TRUSTCERT,
|
| 26 |
+
IS_HUGGING_FACE,
|
| 27 |
+
PROGRESS_TBL, # make sure this exists in config.py
|
| 28 |
)
|
| 29 |
+
|
| 30 |
from models import db
|
| 31 |
|
| 32 |
+
# LLM / chain imports (safe if module not present)
|
| 33 |
+
try:
|
| 34 |
+
from llm_service import CHAIN_BATCH
|
| 35 |
+
try:
|
| 36 |
+
from llm_service import llm_chain
|
| 37 |
+
except ImportError:
|
| 38 |
+
llm_chain = None
|
| 39 |
+
except ImportError:
|
| 40 |
+
CHAIN_BATCH = None
|
| 41 |
+
llm_chain = None
|
| 42 |
+
|
| 43 |
|
| 44 |
def create_app():
|
| 45 |
app = Flask(__name__)
|
|
|
|
| 55 |
# ----------------------------
|
| 56 |
@app.before_request
|
| 57 |
def log_request_info():
|
| 58 |
+
print(f"\n{'=' * 60}")
|
| 59 |
print("π₯ INCOMING REQUEST:")
|
| 60 |
print(f" Time: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
| 61 |
print(f" Method: {request.method}")
|
|
|
|
| 65 |
if request.user_agent:
|
| 66 |
print(f" User Agent: {request.user_agent.string[:80]}...")
|
| 67 |
print(f" Referrer: {request.referrer}")
|
| 68 |
+
print(f"{'=' * 60}")
|
| 69 |
|
| 70 |
# ----------------------------
|
| 71 |
# DB init
|
|
|
|
| 87 |
import routes as routes_module
|
| 88 |
print("β
DEBUG: Imported routes module")
|
| 89 |
|
| 90 |
+
# Get blueprints safely (None means missing)
|
| 91 |
candidates = [
|
| 92 |
("auth_bp", getattr(routes_module, "auth_bp", None), "/api"),
|
| 93 |
("profiles_bp", getattr(routes_module, "profiles_bp", None), None),
|
|
|
|
| 96 |
("llm_bp", getattr(routes_module, "llm_bp", None), None),
|
| 97 |
]
|
| 98 |
|
| 99 |
+
print(
|
| 100 |
+
"β
DEBUG: Blueprint objects (None means failed):",
|
| 101 |
+
[bp.name if bp else None for _, bp, _ in candidates],
|
| 102 |
+
)
|
| 103 |
|
| 104 |
for name, bp, prefix in candidates:
|
| 105 |
if bp is None:
|
|
|
|
| 124 |
print(f"β DEBUG: Failed to import routes or register blueprints: {e}")
|
| 125 |
traceback.print_exc()
|
| 126 |
|
| 127 |
+
# ------------------------------------------------------------------
|
| 128 |
# Debug endpoints (always available)
|
| 129 |
+
# ------------------------------------------------------------------
|
| 130 |
+
@app.get("/api/_routes")
|
| 131 |
+
@app.get("/debug/routes")
|
| 132 |
+
def list_routes():
|
| 133 |
+
routes_list = []
|
| 134 |
+
for rule in app.url_map.iter_rules():
|
| 135 |
+
routes_list.append(
|
| 136 |
+
{
|
| 137 |
+
"endpoint": rule.endpoint,
|
| 138 |
+
"methods": sorted(list(rule.methods)),
|
| 139 |
+
"rule": str(rule),
|
| 140 |
+
}
|
| 141 |
+
)
|
| 142 |
+
|
| 143 |
+
has_double_api = any(r["rule"].startswith("/api/api/") for r in routes_list)
|
| 144 |
+
|
| 145 |
+
return jsonify(
|
| 146 |
+
{
|
| 147 |
+
"count": len(routes_list),
|
| 148 |
+
"has_double_api_prefix": has_double_api,
|
| 149 |
+
"routes": sorted(routes_list, key=lambda x: x["rule"]),
|
| 150 |
+
}
|
| 151 |
+
)
|
| 152 |
+
|
| 153 |
+
# ------------------------------------------------------------------
|
| 154 |
+
# Health endpoint (both /health and /api/health to avoid breaking clients)
|
| 155 |
+
# ------------------------------------------------------------------
|
| 156 |
@app.get("/health")
|
| 157 |
+
@app.get("/api/health")
|
| 158 |
def health():
|
| 159 |
+
# LLM mode
|
|
|
|
|
|
|
| 160 |
llm_mode = "offline-fallback"
|
|
|
|
| 161 |
try:
|
|
|
|
| 162 |
if CHAIN_BATCH is not None:
|
| 163 |
llm_mode = "openai"
|
| 164 |
except Exception:
|
| 165 |
pass
|
| 166 |
|
| 167 |
+
# FAISS status
|
| 168 |
+
faiss_chunks = len(TEXT_CHUNKS) if TEXT_CHUNKS is not None else 0
|
| 169 |
+
faiss_loaded = bool(HAS_FAISS and FAISS_INDEX is not None and faiss_chunks > 0)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 170 |
|
| 171 |
+
# Knowledge base status
|
| 172 |
+
if knowledge is not None and hasattr(knowledge, "indices"):
|
| 173 |
+
knowledge_indices_count = len(getattr(knowledge, "indices", []))
|
| 174 |
+
knowledge_loaded = knowledge_indices_count > 0
|
| 175 |
+
else:
|
| 176 |
+
knowledge_indices_count = 0
|
| 177 |
+
knowledge_loaded = False
|
| 178 |
|
| 179 |
+
return jsonify(
|
| 180 |
+
{
|
| 181 |
+
"status": "ok",
|
| 182 |
+
"huggingface": bool(IS_HUGGING_FACE), # keep this line (as you requested)
|
| 183 |
+
"llm": llm_mode,
|
| 184 |
+
"has_openai_key": bool(os.getenv("OPENAI_API_KEY")),
|
| 185 |
+
"db": {
|
| 186 |
+
"server": SQL_SERVER,
|
| 187 |
+
"database": SQL_DB,
|
| 188 |
+
"table": PROGRESS_TBL,
|
| 189 |
+
},
|
| 190 |
+
"faiss_available": HAS_FAISS,
|
| 191 |
+
"faiss_loaded": faiss_loaded,
|
| 192 |
+
"faiss_chunks": faiss_chunks,
|
| 193 |
+
"knowledge_base_loaded": knowledge_loaded,
|
| 194 |
+
"knowledge_indices": knowledge_indices_count,
|
| 195 |
+
"blueprints": blueprint_status,
|
| 196 |
+
}
|
| 197 |
+
)
|
| 198 |
|
| 199 |
+
# ------------------------------------------------------------------
|
| 200 |
+
# Home endpoint
|
| 201 |
+
# ------------------------------------------------------------------
|
| 202 |
@app.get("/")
|
| 203 |
def home():
|
| 204 |
+
return jsonify(
|
| 205 |
+
{
|
| 206 |
+
"message": "Unified Py-Match Service (FAISS-enabled)",
|
| 207 |
+
"try": [
|
| 208 |
+
"GET /health",
|
| 209 |
+
"GET /api/health",
|
| 210 |
+
"GET /api/_routes",
|
| 211 |
+
"GET /debug/routes",
|
| 212 |
+
"POST /api/signup",
|
| 213 |
+
"POST /api/login",
|
| 214 |
+
"GET /api/questions/marriage",
|
| 215 |
+
"GET /api/questions/existing-profile/marriage/<user_id>",
|
| 216 |
+
"GET /api/expectation-questions",
|
| 217 |
+
"GET /api/existing-preferences/<user_id>",
|
| 218 |
+
"POST /api/questions/submit-answers/<role>",
|
| 219 |
+
"POST /llm/start (body: { user_id, role, n_questions, batch_size })",
|
| 220 |
+
"POST /llm/next (body: { session_id, selected_color })",
|
| 221 |
+
"GET /api/match/<user_id> (query: ?role=<role>&limit=<num>)",
|
| 222 |
+
],
|
| 223 |
+
}
|
| 224 |
+
)
|
| 225 |
|
| 226 |
+
# ------------------------------------------------------------------
|
| 227 |
# Error handlers
|
| 228 |
+
# ------------------------------------------------------------------
|
| 229 |
@app.errorhandler(404)
|
| 230 |
def not_found(error):
|
| 231 |
return jsonify({"error": "Endpoint not found", "path": request.path}), 404
|
| 232 |
|
| 233 |
@app.errorhandler(405)
|
| 234 |
def method_not_allowed(error):
|
| 235 |
+
return (
|
| 236 |
+
jsonify(
|
| 237 |
+
{
|
| 238 |
+
"error": "Method not allowed",
|
| 239 |
+
"message": f"Method {request.method} not allowed for {request.path}",
|
| 240 |
+
"allowed_methods": (
|
| 241 |
+
error.valid_methods if hasattr(error, "valid_methods") else []
|
| 242 |
+
),
|
| 243 |
+
}
|
| 244 |
+
),
|
| 245 |
+
405,
|
| 246 |
+
)
|
| 247 |
|
| 248 |
@app.errorhandler(500)
|
| 249 |
def internal_error(error):
|
|
|
|
| 252 |
return app
|
| 253 |
|
| 254 |
|
|
|
|
|
|
|
| 255 |
if __name__ == "__main__":
|
| 256 |
+
app = create_app()
|
| 257 |
+
|
| 258 |
+
print(f"\n{'=' * 60}")
|
| 259 |
print("π Flask server starting...")
|
| 260 |
+
print(f"{'=' * 60}")
|
| 261 |
+
|
| 262 |
app.run(host="0.0.0.0", port=5000, debug=True)
|
config.py
CHANGED
|
@@ -21,7 +21,7 @@ if IS_HUGGING_FACE:
|
|
| 21 |
DEFAULT_SQL_TRUSTED = "yes" # Use SQL authentication on Hugging Face
|
| 22 |
else:
|
| 23 |
# Local development configuration
|
| 24 |
-
DEFAULT_SQL_SERVER = "
|
| 25 |
DEFAULT_SQL_DB = "Py_Match"
|
| 26 |
DEFAULT_SQL_TRUSTED = "yes" # Use Windows authentication locally
|
| 27 |
|
|
|
|
| 21 |
DEFAULT_SQL_TRUSTED = "yes" # Use SQL authentication on Hugging Face
|
| 22 |
else:
|
| 23 |
# Local development configuration
|
| 24 |
+
DEFAULT_SQL_SERVER = "PYKARA"
|
| 25 |
DEFAULT_SQL_DB = "Py_Match"
|
| 26 |
DEFAULT_SQL_TRUSTED = "yes" # Use Windows authentication locally
|
| 27 |
|
faiss_service.py
CHANGED
|
@@ -4,7 +4,13 @@ import json
|
|
| 4 |
import pickle
|
| 5 |
import random
|
| 6 |
from typing import Dict, List, Tuple, Optional
|
|
|
|
|
|
|
| 7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
# Try importing faiss
|
| 9 |
try:
|
| 10 |
import faiss
|
|
@@ -108,8 +114,104 @@ def try_load_chunks_from_disk(index_path: str) -> List[str]:
|
|
| 108 |
print(f"Failed to load chunks from {c}:", e)
|
| 109 |
return []
|
| 110 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
def load_faiss_index(index_path: str):
|
| 112 |
-
global FAISS_INDEX, TEXT_CHUNKS
|
| 113 |
if not HAS_FAISS:
|
| 114 |
print("FAISS not installed. Skipping index load.")
|
| 115 |
return
|
|
@@ -118,14 +220,19 @@ def load_faiss_index(index_path: str):
|
|
| 118 |
return
|
| 119 |
try:
|
| 120 |
FAISS_INDEX = faiss.read_index(index_path)
|
| 121 |
-
# try to load chunks from companion files
|
| 122 |
TEXT_CHUNKS = try_load_chunks_from_disk(index_path)
|
| 123 |
-
if
|
| 124 |
-
print("
|
| 125 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 126 |
except Exception as e:
|
| 127 |
print("Failed to load faiss index:", e)
|
| 128 |
FAISS_INDEX = None
|
|
|
|
|
|
|
| 129 |
|
| 130 |
def get_nearest_context(query_emb: List[float] = None, k: int = 5, query_vector: Optional[List[float]] = None):
|
| 131 |
"""Return concatenated top-k chunks for a query."""
|
|
@@ -157,6 +264,22 @@ def get_faiss_context(k=3):
|
|
| 157 |
|
| 158 |
# Initialize knowledge base only if FAISS is available
|
| 159 |
knowledge = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
if HAS_FAISS:
|
| 161 |
knowledge = KnowledgeSource()
|
| 162 |
else:
|
|
@@ -165,4 +288,7 @@ else:
|
|
| 165 |
class DummyKnowledge:
|
| 166 |
def get_relevant_context(self, *args, **kwargs):
|
| 167 |
return []
|
| 168 |
-
knowledge = DummyKnowledge()
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
import pickle
|
| 5 |
import random
|
| 6 |
from typing import Dict, List, Tuple, Optional
|
| 7 |
+
import re
|
| 8 |
+
from collections import defaultdict
|
| 9 |
|
| 10 |
+
import os
|
| 11 |
+
|
| 12 |
+
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
| 13 |
+
FAISS_INDEX_PATH = os.path.join(BASE_DIR, "faiss_index_file.index") # Direct path
|
| 14 |
# Try importing faiss
|
| 15 |
try:
|
| 16 |
import faiss
|
|
|
|
| 114 |
print(f"Failed to load chunks from {c}:", e)
|
| 115 |
return []
|
| 116 |
|
| 117 |
+
# Add this to faiss_service.py after the imports
|
| 118 |
+
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
def extract_color_behaviors_from_chunks(chunks: List[str]) -> Dict[str, List[str]]:
|
| 122 |
+
"""
|
| 123 |
+
Extract color-specific behaviors from book chunks
|
| 124 |
+
Returns: {"blue": [behaviors], "green": [behaviors], "red": [behaviors], "yellow": [behaviors]}
|
| 125 |
+
"""
|
| 126 |
+
color_keywords = {
|
| 127 |
+
"blue": [
|
| 128 |
+
"analytical", "fact-based", "data", "research", "analyze", "logic",
|
| 129 |
+
"detail", "thorough", "precision", "evidence", "numbers", "verify",
|
| 130 |
+
"critical", "skeptical", "methodical", "systematic", "investigate",
|
| 131 |
+
"examine", "study", "proof", "accuracy", "rational", "reasoning"
|
| 132 |
+
],
|
| 133 |
+
"green": [
|
| 134 |
+
"organized", "process", "systematic", "routine", "plan", "structure",
|
| 135 |
+
"reliable", "consistent", "predictable", "bureaucratic", "procedural",
|
| 136 |
+
"methodical", "step-by-step", "orderly", "structured", "traditional",
|
| 137 |
+
"stable", "secure", "dependable", "regulated", "formal", "order"
|
| 138 |
+
],
|
| 139 |
+
"red": [
|
| 140 |
+
"decisive", "action-oriented", "direct", "results", "take charge",
|
| 141 |
+
"impulsive", "controlling", "dominant", "assertive", "leadership",
|
| 142 |
+
"quick", "immediate", "practical", "hands-on", "confrontational",
|
| 143 |
+
"bold", "forceful", "authoritative", "dynamic", "active", "energetic"
|
| 144 |
+
],
|
| 145 |
+
"yellow": [
|
| 146 |
+
"creative", "big-picture", "visionary", "innovative", "ideas",
|
| 147 |
+
"unrealistic", "scattered", "enthusiastic", "optimistic", "inspiring",
|
| 148 |
+
"imaginative", "exploratory", "experimental", "spontaneous", "free-thinking",
|
| 149 |
+
"inventive", "original", "artistic", "expressive", "playful", "curious"
|
| 150 |
+
]
|
| 151 |
+
}
|
| 152 |
+
|
| 153 |
+
color_behaviors = defaultdict(list)
|
| 154 |
+
|
| 155 |
+
for chunk in chunks:
|
| 156 |
+
sentences = re.split(r'[.!?]+', chunk)
|
| 157 |
+
for sentence in sentences:
|
| 158 |
+
sentence = sentence.strip()
|
| 159 |
+
if len(sentence.split()) < 5 or len(sentence.split()) > 25:
|
| 160 |
+
continue # Skip too short or too long sentences
|
| 161 |
+
|
| 162 |
+
sentence_lower = sentence.lower()
|
| 163 |
+
|
| 164 |
+
# Find which color this sentence describes
|
| 165 |
+
for color, keywords in color_keywords.items():
|
| 166 |
+
if any(keyword in sentence_lower for keyword in keywords):
|
| 167 |
+
# Clean and format the behavior
|
| 168 |
+
behavior = sentence.strip()
|
| 169 |
+
# Remove quotes if present
|
| 170 |
+
behavior = behavior.replace('"', '').replace("'", "")
|
| 171 |
+
# Capitalize first letter
|
| 172 |
+
if behavior and behavior[0].islower():
|
| 173 |
+
behavior = behavior[0].upper() + behavior[1:]
|
| 174 |
+
|
| 175 |
+
# Avoid duplicates
|
| 176 |
+
if behavior not in color_behaviors[color]:
|
| 177 |
+
color_behaviors[color].append(behavior)
|
| 178 |
+
|
| 179 |
+
break
|
| 180 |
+
|
| 181 |
+
return dict(color_behaviors)
|
| 182 |
+
|
| 183 |
+
def load_color_examples():
|
| 184 |
+
"""
|
| 185 |
+
Load color examples from the book chunks
|
| 186 |
+
"""
|
| 187 |
+
global COLOR_EXAMPLES
|
| 188 |
+
|
| 189 |
+
if not TEXT_CHUNKS:
|
| 190 |
+
print("No text chunks loaded - cannot extract color examples")
|
| 191 |
+
COLOR_EXAMPLES = None
|
| 192 |
+
return
|
| 193 |
+
|
| 194 |
+
try:
|
| 195 |
+
COLOR_EXAMPLES = extract_color_behaviors_from_chunks(TEXT_CHUNKS)
|
| 196 |
+
print(f"Loaded color examples from book: {', '.join([f'{color}: {len(examples)}' for color, examples in COLOR_EXAMPLES.items()])}")
|
| 197 |
+
|
| 198 |
+
# Debug: Show sample behaviors
|
| 199 |
+
print("\n=== SAMPLE COLOR EXAMPLES ===")
|
| 200 |
+
for color, examples in COLOR_EXAMPLES.items():
|
| 201 |
+
print(f"\n{color.upper()} (first 3 examples):")
|
| 202 |
+
for i, example in enumerate(examples[:3], 1):
|
| 203 |
+
print(f" {i}. {example}")
|
| 204 |
+
print("=============================\n")
|
| 205 |
+
except Exception as e:
|
| 206 |
+
print(f"Failed to extract color examples: {e}")
|
| 207 |
+
COLOR_EXAMPLES = None
|
| 208 |
+
|
| 209 |
+
# Initialize COLOR_EXAMPLES
|
| 210 |
+
COLOR_EXAMPLES = None
|
| 211 |
+
|
| 212 |
+
# Update load_faiss_index to also load color examples
|
| 213 |
def load_faiss_index(index_path: str):
|
| 214 |
+
global FAISS_INDEX, TEXT_CHUNKS, COLOR_EXAMPLES
|
| 215 |
if not HAS_FAISS:
|
| 216 |
print("FAISS not installed. Skipping index load.")
|
| 217 |
return
|
|
|
|
| 220 |
return
|
| 221 |
try:
|
| 222 |
FAISS_INDEX = faiss.read_index(index_path)
|
|
|
|
| 223 |
TEXT_CHUNKS = try_load_chunks_from_disk(index_path)
|
| 224 |
+
if TEXT_CHUNKS:
|
| 225 |
+
print(f"Loaded {len(TEXT_CHUNKS)} text chunks")
|
| 226 |
+
# Extract color examples from chunks
|
| 227 |
+
load_color_examples()
|
| 228 |
+
else:
|
| 229 |
+
print("Warning: No text chunks found.")
|
| 230 |
+
COLOR_EXAMPLES = None
|
| 231 |
except Exception as e:
|
| 232 |
print("Failed to load faiss index:", e)
|
| 233 |
FAISS_INDEX = None
|
| 234 |
+
COLOR_EXAMPLES = None
|
| 235 |
+
|
| 236 |
|
| 237 |
def get_nearest_context(query_emb: List[float] = None, k: int = 5, query_vector: Optional[List[float]] = None):
|
| 238 |
"""Return concatenated top-k chunks for a query."""
|
|
|
|
| 264 |
|
| 265 |
# Initialize knowledge base only if FAISS is available
|
| 266 |
knowledge = None
|
| 267 |
+
|
| 268 |
+
# --- Auto-load main FAISS index on import ---
|
| 269 |
+
if HAS_FAISS:
|
| 270 |
+
try:
|
| 271 |
+
if os.path.exists(FAISS_INDEX_PATH):
|
| 272 |
+
print(f"[faiss_service] Loading FAISS index from: {FAISS_INDEX_PATH}")
|
| 273 |
+
load_faiss_index(FAISS_INDEX_PATH)
|
| 274 |
+
else:
|
| 275 |
+
print(f"[faiss_service] FAISS index file NOT found at: {FAISS_INDEX_PATH}")
|
| 276 |
+
except Exception as e:
|
| 277 |
+
print(f"[faiss_service] Error while loading FAISS index: {e}")
|
| 278 |
+
else:
|
| 279 |
+
print("[faiss_service] FAISS not installed, index will not be loaded.")
|
| 280 |
+
|
| 281 |
+
|
| 282 |
+
|
| 283 |
if HAS_FAISS:
|
| 284 |
knowledge = KnowledgeSource()
|
| 285 |
else:
|
|
|
|
| 288 |
class DummyKnowledge:
|
| 289 |
def get_relevant_context(self, *args, **kwargs):
|
| 290 |
return []
|
| 291 |
+
knowledge = DummyKnowledge()
|
| 292 |
+
|
| 293 |
+
|
| 294 |
+
|
llm_service.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
| 1 |
-
# llm_service.py
|
| 2 |
import pyodbc
|
| 3 |
import os
|
| 4 |
import json
|
|
@@ -22,16 +21,21 @@ except Exception:
|
|
| 22 |
HAS_LLM_STACK = False
|
| 23 |
HAS_LLM = False
|
| 24 |
|
|
|
|
| 25 |
class Option(BaseModel):
|
| 26 |
text: str
|
| 27 |
color: str
|
| 28 |
|
|
|
|
| 29 |
class QAItem(BaseModel):
|
| 30 |
question: str
|
| 31 |
options: List[Option] = Field(min_items=4, max_items=4)
|
| 32 |
|
|
|
|
| 33 |
class BatchQA(BaseModel):
|
| 34 |
items: List[QAItem] = Field(..., min_items=1)
|
|
|
|
|
|
|
| 35 |
SYSTEM_PROMPT = (
|
| 36 |
"You write marriage compatibility assessment questions that reveal four personality colors through forced choices:\n"
|
| 37 |
"- blue=analytical, fact-based (positive: thorough, precise | negative: overly critical, data-obsessed)\n"
|
|
@@ -65,8 +69,7 @@ SYSTEM_PROMPT = (
|
|
| 65 |
|
| 66 |
USER_PROMPT_BATCH = (
|
| 67 |
"Context (from Surrounded by Idiots or other corpus):\n{context}\n\n"
|
| 68 |
-
"Question Type: {question_type}\n\n"
|
| 69 |
-
|
| 70 |
"User Profile (Current Background):\n"
|
| 71 |
"- Education: {education}\n"
|
| 72 |
"- Employment: {employment}\n"
|
|
@@ -89,12 +92,10 @@ USER_PROMPT_BATCH = (
|
|
| 89 |
"Themes (array of short strings): {themes_json}\n"
|
| 90 |
"Previously asked questions: {previous_questions}\n\n"
|
| 91 |
"{format_instructions}\n\n"
|
| 92 |
-
|
| 93 |
"Generate {question_type} questions:\n"
|
| 94 |
"- If QUESTION TYPE is 'profile': Generate 5 questions using ONLY profile data (education, employment, hobbies, family background, current lifestyle)\n"
|
| 95 |
"- If QUESTION TYPE is 'expectation': Generate 5 questions using ONLY expectation data (conflict style, financial preferences, values, deal breakers)\n"
|
| 96 |
"- If QUESTION TYPE is 'character': Generate 10 questions about CURRENT behavior in various life situations\n\n"
|
| 97 |
-
|
| 98 |
"CRITICAL RULES:\n"
|
| 99 |
"1) DO NOT use prefixes like 'Based on your profile' or 'Considering your expectations'\n"
|
| 100 |
"2) Questions should be natural and flow conversationally\n"
|
|
@@ -113,7 +114,7 @@ CHAIN_BATCH = None
|
|
| 113 |
if HAS_LLM_STACK and os.getenv("OPENAI_API_KEY"):
|
| 114 |
try:
|
| 115 |
PARSER_BATCH = PydanticOutputParser(pydantic_object=BatchQA)
|
| 116 |
-
|
| 117 |
def build_batch_chain():
|
| 118 |
llm = ChatOpenAI(
|
| 119 |
model="gpt-4o-mini",
|
|
@@ -122,10 +123,12 @@ if HAS_LLM_STACK and os.getenv("OPENAI_API_KEY"):
|
|
| 122 |
timeout=30,
|
| 123 |
model_kwargs={"response_format": {"type": "json_object"}},
|
| 124 |
)
|
| 125 |
-
prompt = ChatPromptTemplate.from_messages(
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
|
|
|
|
|
|
| 129 |
return prompt | llm | PARSER_BATCH
|
| 130 |
|
| 131 |
CHAIN_BATCH = build_batch_chain()
|
|
@@ -133,50 +136,88 @@ if HAS_LLM_STACK and os.getenv("OPENAI_API_KEY"):
|
|
| 133 |
print("Failed to build CHAIN_BATCH:", e)
|
| 134 |
CHAIN_BATCH = None
|
| 135 |
|
|
|
|
| 136 |
def ensure_valid_colors(options: List[Dict]) -> List[Dict]:
|
| 137 |
seen, fixed = set(), []
|
| 138 |
defaults = {
|
| 139 |
-
"blue": "
|
| 140 |
-
"green": "
|
| 141 |
-
"red": "
|
| 142 |
-
"yellow": "
|
| 143 |
}
|
| 144 |
for o in options:
|
| 145 |
c = str(o.get("color", "")).lower()
|
| 146 |
t = str(o.get("text", "")).strip()
|
| 147 |
if c in COLOR_KEYS and c not in seen and t:
|
| 148 |
-
seen.add(c)
|
|
|
|
| 149 |
for c in COLOR_KEYS:
|
| 150 |
if c not in seen:
|
| 151 |
fixed.append({"text": defaults[c], "color": c})
|
| 152 |
return fixed[:4]
|
| 153 |
|
|
|
|
| 154 |
def summarize_profile(profile: Dict) -> Dict:
|
| 155 |
"""Extract all non-PII columns from Marriage table for LLM context"""
|
| 156 |
out: Dict = {}
|
| 157 |
-
|
| 158 |
# All columns from Marriage table (excluding PII where possible)
|
| 159 |
marriage_columns = [
|
| 160 |
-
"user_id",
|
| 161 |
-
"
|
| 162 |
-
"
|
| 163 |
-
"
|
| 164 |
-
"
|
| 165 |
-
"
|
| 166 |
-
"
|
| 167 |
-
"
|
| 168 |
-
"
|
| 169 |
-
"
|
| 170 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 171 |
]
|
| 172 |
-
|
| 173 |
for col in marriage_columns:
|
| 174 |
v = profile.get(col)
|
| 175 |
if v not in (None, "", []):
|
| 176 |
out[col] = v
|
| 177 |
-
|
| 178 |
return out
|
| 179 |
|
|
|
|
| 180 |
def offline_generate_batch(themes: List[str], state: Dict, context: str = "") -> List[Dict]:
|
| 181 |
prof = state.get("profile", {}) or {}
|
| 182 |
name = prof.get("full_name") or "Partner"
|
|
@@ -206,7 +247,7 @@ def offline_generate_batch(themes: List[str], state: Dict, context: str = "") ->
|
|
| 206 |
|
| 207 |
# incorporate small bit from context if available (first 120 chars)
|
| 208 |
if context:
|
| 209 |
-
ctx_snip = context.replace(
|
| 210 |
q = f"{q} (Note: {ctx_snip})"
|
| 211 |
|
| 212 |
# Keep concise
|
|
@@ -214,161 +255,191 @@ def offline_generate_batch(themes: List[str], state: Dict, context: str = "") ->
|
|
| 214 |
q = " ".join(q.split()[:20])
|
| 215 |
|
| 216 |
opts = [
|
| 217 |
-
{"text": "Check
|
| 218 |
-
{"text": "
|
| 219 |
-
{"text": "
|
| 220 |
-
{"text": "
|
| 221 |
]
|
| 222 |
random.shuffle(opts)
|
| 223 |
items.append({"question": q, "options": opts, "source": "fallback"})
|
| 224 |
return items
|
| 225 |
|
| 226 |
|
| 227 |
-
def generate_category_specific_options(
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
|
|
|
|
|
|
|
|
|
| 231 |
question_lower = question_text.lower()
|
| 232 |
-
|
| 233 |
# Extract key themes from question for better contextualization
|
| 234 |
themes_in_question = []
|
| 235 |
-
for theme in [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 236 |
if theme in question_lower:
|
| 237 |
themes_in_question.append(theme)
|
| 238 |
-
|
| 239 |
-
# Default option templates for each color
|
| 240 |
base_options = {
|
| 241 |
"blue": {
|
| 242 |
-
"profile": "
|
| 243 |
-
"expectation": "
|
| 244 |
-
"character": "
|
| 245 |
},
|
| 246 |
"green": {
|
| 247 |
-
"profile": "Follow a
|
| 248 |
-
"expectation": "
|
| 249 |
-
"character": "
|
| 250 |
},
|
| 251 |
"red": {
|
| 252 |
-
"profile": "
|
| 253 |
-
"expectation": "Take charge and
|
| 254 |
-
"character": "Act
|
| 255 |
},
|
| 256 |
"yellow": {
|
| 257 |
-
"profile": "
|
| 258 |
-
"expectation": "
|
| 259 |
-
"character": "
|
| 260 |
-
}
|
| 261 |
}
|
| 262 |
-
|
| 263 |
-
# Contextual variations based on question themes
|
| 264 |
contextual_variations = {
|
| 265 |
"learning": {
|
| 266 |
-
"blue": "Study
|
| 267 |
-
"green": "Follow the
|
| 268 |
-
"red": "
|
| 269 |
-
"yellow": "
|
| 270 |
},
|
| 271 |
"problem": {
|
| 272 |
-
"blue": "
|
| 273 |
-
"green": "
|
| 274 |
-
"red": "
|
| 275 |
-
"yellow": "Find
|
| 276 |
},
|
| 277 |
"conflict": {
|
| 278 |
-
"blue": "
|
| 279 |
-
"green": "
|
| 280 |
-
"red": "
|
| 281 |
-
"yellow": "Find
|
| 282 |
},
|
| 283 |
"money": {
|
| 284 |
-
"blue": "
|
| 285 |
-
"green": "
|
| 286 |
-
"red": "Make
|
| 287 |
-
"yellow": "
|
| 288 |
},
|
| 289 |
"family": {
|
| 290 |
-
"blue": "
|
| 291 |
-
"green": "
|
| 292 |
-
"red": "Take
|
| 293 |
-
"yellow": "
|
| 294 |
-
}
|
| 295 |
}
|
| 296 |
-
|
| 297 |
# Start with base options for the category
|
| 298 |
options = []
|
| 299 |
for color in COLOR_KEYS:
|
| 300 |
base_text = base_options[color][question_type]
|
| 301 |
-
|
| 302 |
# Add contextual variation if theme matches
|
| 303 |
for theme, variations in contextual_variations.items():
|
| 304 |
if theme in themes_in_question:
|
| 305 |
base_text = variations[color]
|
| 306 |
break
|
| 307 |
-
|
| 308 |
-
# Add shadow/negative aspects for realism
|
| 309 |
shadow_aspects = {
|
| 310 |
"blue": {
|
| 311 |
-
"profile": " (but can
|
| 312 |
-
"expectation": " (but
|
| 313 |
-
"character": " (but can be
|
| 314 |
},
|
| 315 |
"green": {
|
| 316 |
-
"profile": " (but can be too
|
| 317 |
-
"expectation": " (but
|
| 318 |
-
"character": " (but can
|
| 319 |
},
|
| 320 |
"red": {
|
| 321 |
-
"profile": " (but can
|
| 322 |
-
"expectation": " (but
|
| 323 |
-
"character": " (but can
|
| 324 |
},
|
| 325 |
"yellow": {
|
| 326 |
-
"profile": " (but can
|
| 327 |
-
"expectation": " (but
|
| 328 |
-
"character": " (but can be
|
| 329 |
-
}
|
| 330 |
}
|
| 331 |
-
|
| 332 |
# Only add shadow aspects occasionally (30% chance) for variety
|
| 333 |
if random.random() < 0.3:
|
| 334 |
shadow = shadow_aspects[color][question_type]
|
| 335 |
-
# Ensure we
|
| 336 |
if len(base_text.split()) + len(shadow.split()) <= 15:
|
| 337 |
base_text += shadow
|
| 338 |
-
|
| 339 |
-
options.append({
|
| 340 |
-
"text": base_text[:80], # Limit length
|
| 341 |
-
"color": color
|
| 342 |
-
})
|
| 343 |
-
|
| 344 |
-
return options
|
| 345 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 346 |
|
| 347 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 348 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 349 |
|
| 350 |
|
| 351 |
-
def generate_batch_questions(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 352 |
# Extract ALL data from Marriage table
|
| 353 |
profile = state.get("profile", {})
|
| 354 |
user_id = profile.get("user_id")
|
| 355 |
-
|
| 356 |
try:
|
| 357 |
from database import fetch_expectation_data
|
|
|
|
| 358 |
expectation_data = fetch_expectation_data(user_id) if user_id else {}
|
| 359 |
except ImportError:
|
| 360 |
expectation_data = {}
|
| 361 |
-
|
| 362 |
# Extract ALL profile data from Marriage table
|
| 363 |
profile_data = {
|
| 364 |
# Personal Information
|
| 365 |
"full_name": profile.get("full_name", "Not specified"),
|
| 366 |
-
|
| 367 |
"gender": profile.get("gender", "Not specified"),
|
| 368 |
"current_city": profile.get("current_city", "Not specified"),
|
| 369 |
"country": profile.get("country", "Not specified"),
|
| 370 |
"marital_status": profile.get("marital_status", "Not specified"),
|
| 371 |
-
|
| 372 |
# Education & Career
|
| 373 |
"education_level": profile.get("education_level", "Not specified"),
|
| 374 |
"employment_status": profile.get("employment_status", "Not specified"),
|
|
@@ -377,23 +448,24 @@ def generate_batch_questions(themes: List[str], state: Dict, context: str = "",
|
|
| 377 |
"career_aspirations": profile.get("career_aspirations", "Not specified"),
|
| 378 |
"field_of_study": profile.get("field_of_study", "Not specified"),
|
| 379 |
"income_range": profile.get("income_range", "Not specified"),
|
| 380 |
-
|
| 381 |
# Family & Background
|
| 382 |
"number_of_siblings": profile.get("number_of_siblings", "Not specified"),
|
| 383 |
"family_type": profile.get("family_type", "Not specified"),
|
| 384 |
"siblings_position": profile.get("siblings_position", "Not specified"),
|
| 385 |
"parents_living_status": profile.get("parents_living_status", "Not specified"),
|
| 386 |
"live_with_parents": profile.get("live_with_parents", "Not specified"),
|
| 387 |
-
"support_parents_financially": profile.get(
|
| 388 |
-
|
| 389 |
-
|
|
|
|
|
|
|
|
|
|
| 390 |
# Physical & Health
|
| 391 |
"height": profile.get("height", "Not specified"),
|
| 392 |
"skin_tone": profile.get("skin_tone", "Not specified"),
|
| 393 |
"blood_group": profile.get("blood_group", "Not specified"),
|
| 394 |
"health_constraints": profile.get("health_constraints", "Not specified"),
|
| 395 |
"fitness_level": profile.get("fitness_level", "Not specified"),
|
| 396 |
-
|
| 397 |
# Lifestyle & Habits
|
| 398 |
"hobbies_interests": str(profile.get("hobbies_interests", "Not specified")),
|
| 399 |
"conflict_approach": profile.get("conflict_approach", "Not specified"),
|
|
@@ -405,26 +477,22 @@ def generate_batch_questions(themes: List[str], state: Dict, context: str = "",
|
|
| 405 |
"own_pets": profile.get("own_pets", "Not specified"),
|
| 406 |
"travel_preference": profile.get("travel_preference", "Not specified"),
|
| 407 |
"relaxation_mode": profile.get("relaxation_mode", "Not specified"),
|
| 408 |
-
|
| 409 |
# Languages & Relocation
|
| 410 |
"languages_spoken": profile.get("languages_spoken", "Not specified"),
|
| 411 |
"relocation_willingness": profile.get("relocation_willingness", "Not specified"),
|
| 412 |
-
|
| 413 |
# Religion & Citizenship
|
| 414 |
"religion": profile.get("religion", "Not specified"),
|
| 415 |
"dual_citizenship": profile.get("dual_citizenship", "Not specified"),
|
| 416 |
-
|
| 417 |
# Relationship Preferences
|
| 418 |
"children_timeline": profile.get("children_timeline", "Not specified"),
|
| 419 |
"open_to_adoption": profile.get("open_to_adoption", "Not specified"),
|
| 420 |
"deal_breakers": profile.get("deal_breakers", "Not specified"),
|
| 421 |
"other_non_negotiables": profile.get("other_non_negotiables", "Not specified"),
|
| 422 |
"live_with_inlaws": profile.get("live_with_inlaws", "Not specified"),
|
| 423 |
-
|
| 424 |
# Additional Info
|
| 425 |
"remark": profile.get("remark", "Not specified"),
|
| 426 |
}
|
| 427 |
-
|
| 428 |
# Extract ALL expectation data from ExpectationResponse table
|
| 429 |
expectation_data_dict = {
|
| 430 |
# Basic Preferences
|
|
@@ -433,75 +501,108 @@ def generate_batch_questions(themes: List[str], state: Dict, context: str = "",
|
|
| 433 |
"pref_current_city": expectation_data.get("pref_current_city", "Not specified"),
|
| 434 |
"pref_countries": expectation_data.get("pref_countries", "Not specified"),
|
| 435 |
"pref_languages": expectation_data.get("pref_languages", "Not specified"),
|
| 436 |
-
"pref_education_level": expectation_data.get(
|
| 437 |
-
|
| 438 |
-
|
|
|
|
|
|
|
|
|
|
| 439 |
# Health & Lifestyle
|
| 440 |
"health_constraints": expectation_data.get("health_constraints", "Not specified"),
|
| 441 |
"pref_diet": expectation_data.get("pref_diet", "Not specified"),
|
| 442 |
"accept_smoking": expectation_data.get("accept_smoking", "Not specified"),
|
| 443 |
"accept_alcohol": expectation_data.get("accept_alcohol", "Not specified"),
|
| 444 |
"pref_fitness": expectation_data.get("pref_fitness", "Not specified"),
|
| 445 |
-
|
| 446 |
# Family & Living
|
| 447 |
"pref_family_type": expectation_data.get("pref_family_type", "Not specified"),
|
| 448 |
"live_with_inlaws": expectation_data.get("live_with_inlaws", "Not specified"),
|
| 449 |
"children_timeline": expectation_data.get("children_timeline", "Not specified"),
|
| 450 |
"open_to_adoption": expectation_data.get("open_to_adoption", "Not specified"),
|
| 451 |
-
"pref_live_with_parents": expectation_data.get(
|
| 452 |
-
|
| 453 |
-
|
|
|
|
|
|
|
|
|
|
| 454 |
# Conflict & Finance
|
| 455 |
-
"pref_conflict_approach": expectation_data.get(
|
| 456 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 457 |
"pref_income_range": expectation_data.get("pref_income_range", "Not specified"),
|
| 458 |
-
|
| 459 |
# Values & Compatibility
|
| 460 |
-
"religion_alignment": expectation_data.get(
|
| 461 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 462 |
"travel_pref": expectation_data.get("travel_pref", "Not specified"),
|
| 463 |
"pet_pref": expectation_data.get("pet_pref", "Not specified"),
|
| 464 |
-
|
| 465 |
# Career & Relocation
|
| 466 |
-
"pref_partner_relocation": expectation_data.get(
|
| 467 |
-
|
| 468 |
-
|
|
|
|
|
|
|
|
|
|
| 469 |
# Additional Preferences
|
| 470 |
"marital_status": expectation_data.get("marital_status", "Not specified"),
|
| 471 |
"skin_tone": expectation_data.get("skin_tone", "Not specified"),
|
| 472 |
"daily_routine": expectation_data.get("daily_routine", "Not specified"),
|
| 473 |
-
"family_communication_frequency": expectation_data.get(
|
|
|
|
|
|
|
| 474 |
"relaxation_mode": expectation_data.get("relaxation_mode", "Not specified"),
|
| 475 |
-
|
| 476 |
# Non-negotiables
|
| 477 |
"deal_breakers": expectation_data.get("deal_breakers", "Not specified"),
|
| 478 |
-
"other_non_negotiables": expectation_data.get(
|
| 479 |
-
|
|
|
|
| 480 |
# Summary
|
| 481 |
"expectation_summary": expectation_data.get("expectation_summary", "Not specified"),
|
| 482 |
"_mandatory_fields": expectation_data.get("_mandatory_fields", "Not specified"),
|
| 483 |
}
|
| 484 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 485 |
if CHAIN_BATCH is not None and PARSER_BATCH is not None:
|
| 486 |
try:
|
| 487 |
items: List[Dict] = []
|
| 488 |
-
|
| 489 |
# 1. PROFILE-BASED QUESTIONS (5 questions) - Using ALL Marriage table columns
|
| 490 |
profile_prompt = {
|
| 491 |
"state": json.dumps(state, ensure_ascii=False),
|
| 492 |
"themes_json": json.dumps(["profile"] * 5, ensure_ascii=False),
|
| 493 |
-
"previous_questions": json.dumps(
|
|
|
|
|
|
|
| 494 |
"format_instructions": PARSER_BATCH.get_format_instructions(),
|
| 495 |
"context": "Generate 5 PROFILE-BASED questions using ALL available user background information.",
|
| 496 |
"question_type": "profile",
|
| 497 |
-
|
| 498 |
# Use ALL profile data
|
| 499 |
"education": f"{profile_data['education_level']} | {profile_data['field_of_study']}",
|
| 500 |
"employment": f"{profile_data['employment_status']} | {profile_data['job_role']} ({profile_data['work_experience_years']} years)",
|
| 501 |
-
"hobbies": profile_data[
|
| 502 |
"family_type": f"{profile_data['family_type']} | Siblings: {profile_data['number_of_siblings']} | Position: {profile_data['siblings_position']}",
|
| 503 |
-
"current_lifestyle":
|
| 504 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 505 |
# Expectation data marked as irrelevant
|
| 506 |
"conflict_style": "IRRELEVANT_FOR_PROFILE_QUESTIONS",
|
| 507 |
"financial_style": "IRRELEVANT_FOR_PROFILE_QUESTIONS",
|
|
@@ -514,127 +615,211 @@ def generate_batch_questions(themes: List[str], state: Dict, context: str = "",
|
|
| 514 |
"ambition_pref": "IRRELEVANT_FOR_PROFILE_QUESTIONS",
|
| 515 |
"deal_breakers": "IRRELEVANT_FOR_PROFILE_QUESTIONS",
|
| 516 |
}
|
| 517 |
-
|
| 518 |
result = CHAIN_BATCH.invoke(profile_prompt)
|
| 519 |
profile_items = get_items_from_result(result)
|
| 520 |
-
|
| 521 |
for qa in profile_items[:5]:
|
| 522 |
out = qa.dict() if hasattr(qa, "dict") else dict(qa)
|
| 523 |
-
|
| 524 |
-
|
| 525 |
-
|
| 526 |
-
|
| 527 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 528 |
)
|
| 529 |
-
out["source"] = "llm_profile"
|
| 530 |
out["question_type"] = "profile"
|
| 531 |
random.shuffle(out["options"])
|
| 532 |
items.append(out)
|
| 533 |
-
|
| 534 |
# 2. EXPECTATION-BASED QUESTIONS (5 questions) - Using ALL ExpectationResponse columns
|
| 535 |
expectation_prompt = {
|
| 536 |
"state": json.dumps(state, ensure_ascii=False),
|
| 537 |
"themes_json": json.dumps(["expectation"] * 5, ensure_ascii=False),
|
| 538 |
-
"previous_questions": json.dumps(
|
|
|
|
|
|
|
|
|
|
| 539 |
"format_instructions": PARSER_BATCH.get_format_instructions(),
|
| 540 |
"context": "Generate 5 EXPECTATION-BASED questions using ALL relationship preferences and expectations.",
|
| 541 |
"question_type": "expectation",
|
| 542 |
-
|
| 543 |
# Minimal profile context
|
| 544 |
"education": "Background context only",
|
| 545 |
"employment": "Background context only",
|
| 546 |
"hobbies": "Background context only",
|
| 547 |
"family_type": "Background context only",
|
| 548 |
"current_lifestyle": "General context",
|
| 549 |
-
|
| 550 |
# Use ALL expectation data
|
| 551 |
"conflict_style": f"{expectation_data_dict['pref_conflict_approach']}",
|
| 552 |
-
"financial_style":
|
| 553 |
-
|
| 554 |
-
|
| 555 |
-
|
| 556 |
-
"
|
| 557 |
-
"
|
| 558 |
-
|
| 559 |
-
|
| 560 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 561 |
}
|
| 562 |
-
|
| 563 |
result = CHAIN_BATCH.invoke(expectation_prompt)
|
| 564 |
expectation_items = get_items_from_result(result)
|
| 565 |
-
|
| 566 |
for qa in expectation_items[:5]:
|
| 567 |
out = qa.dict() if hasattr(qa, "dict") else dict(qa)
|
| 568 |
-
|
| 569 |
-
|
| 570 |
-
|
| 571 |
-
|
| 572 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 573 |
)
|
| 574 |
-
out["source"] = "llm_expectation"
|
| 575 |
out["question_type"] = "expectation"
|
| 576 |
random.shuffle(out["options"])
|
| 577 |
items.append(out)
|
| 578 |
-
|
| 579 |
# 3. CHARACTER-BASED QUESTIONS (10 questions) - Using data from BOTH tables
|
| 580 |
character_prompt = {
|
| 581 |
"state": json.dumps(state, ensure_ascii=False),
|
| 582 |
"themes_json": json.dumps(themes[:10], ensure_ascii=False),
|
| 583 |
-
"previous_questions": json.dumps(
|
|
|
|
|
|
|
|
|
|
| 584 |
"format_instructions": PARSER_BATCH.get_format_instructions(),
|
| 585 |
-
"context": context[:2000]
|
|
|
|
| 586 |
"question_type": "character",
|
| 587 |
-
|
| 588 |
# All data from Marriage table
|
| 589 |
-
"education": profile_data[
|
| 590 |
-
"employment": profile_data[
|
| 591 |
-
"hobbies": profile_data[
|
| 592 |
-
"family_type": profile_data[
|
| 593 |
-
"current_lifestyle":
|
| 594 |
-
|
|
|
|
|
|
|
| 595 |
# All data from ExpectationResponse table
|
| 596 |
-
"conflict_style": expectation_data_dict[
|
| 597 |
-
"financial_style": expectation_data_dict[
|
| 598 |
-
"income_range": expectation_data_dict[
|
| 599 |
-
"relocation_willingness": expectation_data_dict[
|
| 600 |
-
|
| 601 |
-
|
| 602 |
-
"
|
| 603 |
-
"
|
| 604 |
-
"
|
| 605 |
-
"
|
|
|
|
|
|
|
| 606 |
}
|
| 607 |
-
|
| 608 |
result = CHAIN_BATCH.invoke(character_prompt)
|
| 609 |
character_items = get_items_from_result(result)
|
| 610 |
-
|
| 611 |
for qa in character_items[:10]:
|
| 612 |
out = qa.dict() if hasattr(qa, "dict") else dict(qa)
|
| 613 |
-
|
| 614 |
-
|
| 615 |
-
|
| 616 |
-
|
| 617 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 618 |
)
|
| 619 |
-
out["source"] = "llm_character"
|
| 620 |
out["question_type"] = "character"
|
| 621 |
random.shuffle(out["options"])
|
| 622 |
items.append(out)
|
| 623 |
-
|
| 624 |
# Verify we have exactly 20 questions
|
| 625 |
if len(items) == 20:
|
| 626 |
return items[:20]
|
| 627 |
else:
|
| 628 |
-
# If LLM
|
| 629 |
-
return fill_missing_questions(
|
| 630 |
-
|
|
|
|
|
|
|
| 631 |
except Exception as e:
|
| 632 |
print("LLM batch generation failed:", e)
|
| 633 |
-
return generate_fallback_with_distribution(
|
|
|
|
|
|
|
| 634 |
else:
|
| 635 |
-
return generate_fallback_with_distribution(
|
| 636 |
-
|
| 637 |
-
|
| 638 |
|
| 639 |
|
| 640 |
def get_items_from_result(result):
|
|
@@ -646,92 +831,125 @@ def get_items_from_result(result):
|
|
| 646 |
else:
|
| 647 |
return []
|
| 648 |
|
| 649 |
-
|
| 650 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 651 |
"""Fill missing questions to reach 20 total"""
|
| 652 |
items = current_items.copy()
|
| 653 |
-
|
| 654 |
# Count current distribution
|
| 655 |
profile_count = sum(1 for q in items if q.get("question_type") == "profile")
|
| 656 |
expectation_count = sum(1 for q in items if q.get("question_type") == "expectation")
|
| 657 |
character_count = sum(1 for q in items if q.get("question_type") == "character")
|
| 658 |
-
|
| 659 |
# Fill profile questions if needed
|
| 660 |
while profile_count < 5:
|
| 661 |
profile_q = generate_profile_question(state, profile_data)
|
| 662 |
items.append(profile_q)
|
| 663 |
profile_count += 1
|
| 664 |
-
|
| 665 |
# Fill expectation questions if needed
|
| 666 |
while expectation_count < 5:
|
| 667 |
expectation_q = generate_expectation_question(state, expectation_data)
|
| 668 |
items.append(expectation_q)
|
| 669 |
expectation_count += 1
|
| 670 |
-
|
| 671 |
# Fill character questions if needed
|
| 672 |
while character_count < 10:
|
| 673 |
theme = themes[character_count % len(themes)] if themes else "daily situation"
|
| 674 |
character_q = generate_character_question(theme, state)
|
| 675 |
items.append(character_q)
|
| 676 |
character_count += 1
|
| 677 |
-
|
| 678 |
return items[:20]
|
| 679 |
|
|
|
|
| 680 |
def generate_profile_question(state: Dict, profile_data: Dict) -> Dict:
|
| 681 |
"""Generate a single profile question"""
|
| 682 |
prof = state.get("profile", {})
|
| 683 |
name = prof.get("full_name") or "Partner"
|
| 684 |
-
|
| 685 |
profile_topics = [
|
| 686 |
-
(
|
| 687 |
-
|
| 688 |
-
|
| 689 |
-
|
| 690 |
-
(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 691 |
]
|
| 692 |
-
|
| 693 |
-
topic_idx = len(
|
|
|
|
|
|
|
| 694 |
if topic_idx >= len(profile_topics):
|
| 695 |
topic_idx = 0
|
| 696 |
-
|
| 697 |
topic, question = profile_topics[topic_idx]
|
| 698 |
-
|
| 699 |
# Generate contextual options
|
| 700 |
options = generate_category_specific_options("profile", question, profile_data, None)
|
| 701 |
random.shuffle(options)
|
| 702 |
-
|
| 703 |
return {
|
| 704 |
"question": question,
|
| 705 |
"options": options,
|
| 706 |
"source": "fallback_profile",
|
| 707 |
-
"question_type": "profile"
|
| 708 |
}
|
| 709 |
|
|
|
|
| 710 |
def generate_expectation_question(state: Dict, expectation_data: Dict) -> Dict:
|
| 711 |
"""Generate a single expectation question"""
|
| 712 |
expectation_topics = [
|
| 713 |
-
(
|
| 714 |
-
|
| 715 |
-
|
| 716 |
-
|
| 717 |
-
("
|
|
|
|
|
|
|
|
|
|
| 718 |
]
|
| 719 |
-
|
| 720 |
-
topic_idx = len(
|
|
|
|
|
|
|
| 721 |
if topic_idx >= len(expectation_topics):
|
| 722 |
topic_idx = 0
|
| 723 |
-
|
| 724 |
topic, question = expectation_topics[topic_idx]
|
| 725 |
-
|
| 726 |
# Generate contextual options
|
| 727 |
-
options = generate_category_specific_options(
|
|
|
|
|
|
|
| 728 |
random.shuffle(options)
|
| 729 |
-
|
| 730 |
return {
|
| 731 |
"question": question,
|
| 732 |
"options": options,
|
| 733 |
"source": "fallback_expectation",
|
| 734 |
-
"question_type": "expectation"
|
| 735 |
}
|
| 736 |
|
| 737 |
|
|
@@ -739,108 +957,127 @@ def generate_character_question(theme: str, state: Dict) -> Dict:
|
|
| 739 |
"""Generate a single character question"""
|
| 740 |
prof = state.get("profile", {})
|
| 741 |
name = prof.get("full_name") or "Partner"
|
| 742 |
-
|
| 743 |
short_theme = theme.split(" around ")[-1].strip()[:50]
|
| 744 |
-
question = f"When {short_theme}, what
|
| 745 |
-
|
| 746 |
# Generate contextual options
|
| 747 |
options = generate_category_specific_options("character", question, None, None)
|
| 748 |
random.shuffle(options)
|
| 749 |
-
|
| 750 |
return {
|
| 751 |
"question": question,
|
| 752 |
"options": options,
|
| 753 |
"source": "fallback_character",
|
| 754 |
-
"question_type": "character"
|
| 755 |
}
|
| 756 |
|
| 757 |
|
| 758 |
-
|
| 759 |
-
|
| 760 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 761 |
"""
|
| 762 |
-
Fallback generator that enforces the 5-5-10 distribution
|
|
|
|
|
|
|
|
|
|
|
|
|
| 763 |
"""
|
| 764 |
-
items = []
|
| 765 |
-
|
| 766 |
-
|
| 767 |
-
|
| 768 |
-
|
| 769 |
-
|
| 770 |
-
|
| 771 |
-
|
| 772 |
-
|
| 773 |
-
("family_type", f"Growing up in a {profile_data['family_type']} family, what communication patterns feel most natural to you?"),
|
| 774 |
-
("current_city", f"Living in {profile_data['current_city']}, how do you adapt to your daily environment?")
|
| 775 |
]
|
| 776 |
-
|
| 777 |
-
for
|
| 778 |
opts = [
|
| 779 |
-
{"text": "
|
| 780 |
-
{"text": "
|
| 781 |
-
{"text": "
|
| 782 |
-
{"text": "
|
| 783 |
]
|
| 784 |
random.shuffle(opts)
|
| 785 |
-
items.append(
|
| 786 |
-
|
| 787 |
-
|
| 788 |
-
|
| 789 |
-
|
| 790 |
-
|
| 791 |
-
|
| 792 |
-
|
| 793 |
-
|
| 794 |
-
|
| 795 |
-
|
| 796 |
-
|
| 797 |
-
|
| 798 |
-
|
|
|
|
|
|
|
| 799 |
]
|
| 800 |
-
|
| 801 |
-
for
|
| 802 |
opts = [
|
| 803 |
-
{"text": "
|
| 804 |
-
{"text": "
|
| 805 |
-
{"text": "
|
| 806 |
-
{"text": "
|
| 807 |
]
|
| 808 |
random.shuffle(opts)
|
| 809 |
-
items.append(
|
| 810 |
-
|
| 811 |
-
|
| 812 |
-
|
| 813 |
-
|
| 814 |
-
|
| 815 |
-
|
|
|
|
|
|
|
| 816 |
# 3. Generate 10 CHARACTER-BASED questions from themes
|
| 817 |
-
for i, theme in enumerate(themes[:10]
|
| 818 |
-
short = theme.split(" around ")[-1].strip()
|
| 819 |
question = f"When dealing with {short}, what is your typical approach?"
|
| 820 |
-
|
| 821 |
opts = [
|
| 822 |
-
{"text": "
|
| 823 |
-
{"text": "
|
| 824 |
-
{"text": "Take charge and
|
| 825 |
-
{"text": "
|
| 826 |
]
|
| 827 |
random.shuffle(opts)
|
| 828 |
-
items.append(
|
| 829 |
-
|
| 830 |
-
|
| 831 |
-
|
| 832 |
-
|
| 833 |
-
|
| 834 |
-
|
|
|
|
|
|
|
| 835 |
# Ensure we have exactly 20 questions
|
| 836 |
return items[:20]
|
| 837 |
|
| 838 |
|
| 839 |
class SessionState:
|
| 840 |
-
def __init__(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 841 |
domain = (domain or role or "general").lower()
|
| 842 |
self.domain = domain if domain in DOMAINS else "general"
|
| 843 |
-
self.role =
|
| 844 |
self.profile = profile or {}
|
| 845 |
self.n_questions = max(1, min(n_questions, MAX_QUESTIONS))
|
| 846 |
self.batch_size = max(1, batch_size)
|
|
@@ -850,11 +1087,14 @@ class SessionState:
|
|
| 850 |
self.queue: List[Dict] = []
|
| 851 |
self.finished = False
|
| 852 |
self.used_topics: List[str] = []
|
| 853 |
-
|
|
|
|
| 854 |
|
| 855 |
def to_min_state(self) -> Dict:
|
| 856 |
total = sum(self.color_counts.values()) or 1
|
| 857 |
-
mix_percentages = {
|
|
|
|
|
|
|
| 858 |
dominant = max(self.color_counts, key=self.color_counts.get) if total else None
|
| 859 |
return {
|
| 860 |
"asked": self.asked,
|
|
@@ -868,10 +1108,12 @@ class SessionState:
|
|
| 868 |
def remaining(self) -> int:
|
| 869 |
return self.n_questions - self.asked
|
| 870 |
|
|
|
|
| 871 |
SESSIONS_FILE = os.getenv("PYMATCH_SESSIONS_FILE", "sessions.json")
|
| 872 |
_sessions_lock = threading.Lock()
|
| 873 |
SESSIONS: Dict[str, SessionState] = {}
|
| 874 |
|
|
|
|
| 875 |
def save_sessions():
|
| 876 |
try:
|
| 877 |
with _sessions_lock:
|
|
@@ -883,34 +1125,56 @@ def save_sessions():
|
|
| 883 |
except Exception as e:
|
| 884 |
print("Failed to save sessions:", e)
|
| 885 |
|
|
|
|
| 886 |
def persist_final_progress(user_id: Optional[str], role: str, mix: Dict[str, float]) -> bool:
|
| 887 |
from database import get_db_connection
|
| 888 |
from config import PROGRESS_TBL
|
| 889 |
-
|
| 890 |
llm_id = str(uuid.uuid4())
|
| 891 |
-
blue
|
| 892 |
-
green
|
| 893 |
yellow = float(mix.get("yellow", 0.0))
|
| 894 |
-
red
|
| 895 |
try:
|
| 896 |
conn = get_db_connection()
|
| 897 |
cur = conn.cursor()
|
| 898 |
# Try with llm_id; if identity error, retry without it
|
| 899 |
try:
|
| 900 |
-
cur.execute(
|
|
|
|
| 901 |
INSERT INTO [dbo].[{PROGRESS_TBL}]
|
| 902 |
([llm_id],[user_id],[role],[blue],[green],[yellow],[red],[created_at])
|
| 903 |
VALUES (?,?,?,?,?,?,?,SYSUTCDATETIME())
|
| 904 |
-
""",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 905 |
conn.commit()
|
| 906 |
return True
|
| 907 |
except pyodbc.Error as e:
|
| 908 |
if "IDENTITY_INSERT" in str(e) or "(544)" in str(e):
|
| 909 |
-
cur.execute(
|
|
|
|
| 910 |
INSERT INTO [dbo].[{PROGRESS_TBL}]
|
| 911 |
([user_id],[role],[blue],[green],[yellow],[red],[created_at])
|
| 912 |
VALUES (?,?,?,?,?,?,SYSUTCDATETIME())
|
| 913 |
-
""",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 914 |
conn.commit()
|
| 915 |
return True
|
| 916 |
else:
|
|
@@ -920,29 +1184,38 @@ def persist_final_progress(user_id: Optional[str], role: str, mix: Dict[str, flo
|
|
| 920 |
print("Persist final progress failed:", ex)
|
| 921 |
return False
|
| 922 |
finally:
|
| 923 |
-
try:
|
| 924 |
-
|
|
|
|
|
|
|
|
|
|
| 925 |
|
| 926 |
def choose_themes(sess, k: int) -> List[str]:
|
| 927 |
-
"""
|
| 928 |
-
Instead of generic topic banks, use FAISS to retrieve text chunks from the document.
|
| 929 |
-
"""
|
| 930 |
try:
|
| 931 |
from faiss_service import HAS_FAISS, FAISS_INDEX, TEXT_CHUNKS
|
| 932 |
-
|
| 933 |
if HAS_FAISS and FAISS_INDEX is not None and TEXT_CHUNKS:
|
| 934 |
# Just grab k random chunks from the indexed document
|
| 935 |
selected = random.sample(TEXT_CHUNKS, min(k, len(TEXT_CHUNKS)))
|
| 936 |
-
# Wrap them as "themes" but really they
|
| 937 |
return selected
|
| 938 |
except ImportError:
|
| 939 |
pass
|
| 940 |
-
|
| 941 |
# fallback: use generic themes
|
| 942 |
fallback_themes = [
|
| 943 |
-
"communication style",
|
| 944 |
-
"
|
| 945 |
-
"
|
| 946 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 947 |
]
|
| 948 |
-
return random.sample(fallback_themes, min(k, len(fallback_themes)))
|
|
|
|
|
|
|
| 1 |
import pyodbc
|
| 2 |
import os
|
| 3 |
import json
|
|
|
|
| 21 |
HAS_LLM_STACK = False
|
| 22 |
HAS_LLM = False
|
| 23 |
|
| 24 |
+
|
| 25 |
class Option(BaseModel):
|
| 26 |
text: str
|
| 27 |
color: str
|
| 28 |
|
| 29 |
+
|
| 30 |
class QAItem(BaseModel):
|
| 31 |
question: str
|
| 32 |
options: List[Option] = Field(min_items=4, max_items=4)
|
| 33 |
|
| 34 |
+
|
| 35 |
class BatchQA(BaseModel):
|
| 36 |
items: List[QAItem] = Field(..., min_items=1)
|
| 37 |
+
|
| 38 |
+
|
| 39 |
SYSTEM_PROMPT = (
|
| 40 |
"You write marriage compatibility assessment questions that reveal four personality colors through forced choices:\n"
|
| 41 |
"- blue=analytical, fact-based (positive: thorough, precise | negative: overly critical, data-obsessed)\n"
|
|
|
|
| 69 |
|
| 70 |
USER_PROMPT_BATCH = (
|
| 71 |
"Context (from Surrounded by Idiots or other corpus):\n{context}\n\n"
|
| 72 |
+
"Question Type: {question_type}\n\n"
|
|
|
|
| 73 |
"User Profile (Current Background):\n"
|
| 74 |
"- Education: {education}\n"
|
| 75 |
"- Employment: {employment}\n"
|
|
|
|
| 92 |
"Themes (array of short strings): {themes_json}\n"
|
| 93 |
"Previously asked questions: {previous_questions}\n\n"
|
| 94 |
"{format_instructions}\n\n"
|
|
|
|
| 95 |
"Generate {question_type} questions:\n"
|
| 96 |
"- If QUESTION TYPE is 'profile': Generate 5 questions using ONLY profile data (education, employment, hobbies, family background, current lifestyle)\n"
|
| 97 |
"- If QUESTION TYPE is 'expectation': Generate 5 questions using ONLY expectation data (conflict style, financial preferences, values, deal breakers)\n"
|
| 98 |
"- If QUESTION TYPE is 'character': Generate 10 questions about CURRENT behavior in various life situations\n\n"
|
|
|
|
| 99 |
"CRITICAL RULES:\n"
|
| 100 |
"1) DO NOT use prefixes like 'Based on your profile' or 'Considering your expectations'\n"
|
| 101 |
"2) Questions should be natural and flow conversationally\n"
|
|
|
|
| 114 |
if HAS_LLM_STACK and os.getenv("OPENAI_API_KEY"):
|
| 115 |
try:
|
| 116 |
PARSER_BATCH = PydanticOutputParser(pydantic_object=BatchQA)
|
| 117 |
+
|
| 118 |
def build_batch_chain():
|
| 119 |
llm = ChatOpenAI(
|
| 120 |
model="gpt-4o-mini",
|
|
|
|
| 123 |
timeout=30,
|
| 124 |
model_kwargs={"response_format": {"type": "json_object"}},
|
| 125 |
)
|
| 126 |
+
prompt = ChatPromptTemplate.from_messages(
|
| 127 |
+
[
|
| 128 |
+
("system", SYSTEM_PROMPT),
|
| 129 |
+
("user", USER_PROMPT_BATCH),
|
| 130 |
+
]
|
| 131 |
+
)
|
| 132 |
return prompt | llm | PARSER_BATCH
|
| 133 |
|
| 134 |
CHAIN_BATCH = build_batch_chain()
|
|
|
|
| 136 |
print("Failed to build CHAIN_BATCH:", e)
|
| 137 |
CHAIN_BATCH = None
|
| 138 |
|
| 139 |
+
|
| 140 |
def ensure_valid_colors(options: List[Dict]) -> List[Dict]:
|
| 141 |
seen, fixed = set(), []
|
| 142 |
defaults = {
|
| 143 |
+
"blue": "Check facts and numbers",
|
| 144 |
+
"green": "Make a step-by-step plan",
|
| 145 |
+
"red": "Get people together and act",
|
| 146 |
+
"yellow": "Think of a new idea",
|
| 147 |
}
|
| 148 |
for o in options:
|
| 149 |
c = str(o.get("color", "")).lower()
|
| 150 |
t = str(o.get("text", "")).strip()
|
| 151 |
if c in COLOR_KEYS and c not in seen and t:
|
| 152 |
+
seen.add(c)
|
| 153 |
+
fixed.append({"text": t[:80], "color": c})
|
| 154 |
for c in COLOR_KEYS:
|
| 155 |
if c not in seen:
|
| 156 |
fixed.append({"text": defaults[c], "color": c})
|
| 157 |
return fixed[:4]
|
| 158 |
|
| 159 |
+
|
| 160 |
def summarize_profile(profile: Dict) -> Dict:
|
| 161 |
"""Extract all non-PII columns from Marriage table for LLM context"""
|
| 162 |
out: Dict = {}
|
| 163 |
+
|
| 164 |
# All columns from Marriage table (excluding PII where possible)
|
| 165 |
marriage_columns = [
|
| 166 |
+
"user_id",
|
| 167 |
+
"full_name",
|
| 168 |
+
"gender",
|
| 169 |
+
"current_city",
|
| 170 |
+
"marital_status",
|
| 171 |
+
"education_level",
|
| 172 |
+
"employment_status",
|
| 173 |
+
"number_of_siblings",
|
| 174 |
+
"family_type",
|
| 175 |
+
"hobbies_interests",
|
| 176 |
+
"conflict_approach",
|
| 177 |
+
"financial_style",
|
| 178 |
+
"income_range",
|
| 179 |
+
"relocation_willingness",
|
| 180 |
+
"height",
|
| 181 |
+
"skin_tone",
|
| 182 |
+
"languages_spoken",
|
| 183 |
+
"country",
|
| 184 |
+
"blood_group",
|
| 185 |
+
"religion",
|
| 186 |
+
"dual_citizenship",
|
| 187 |
+
"siblings_position",
|
| 188 |
+
"parents_living_status",
|
| 189 |
+
"live_with_parents",
|
| 190 |
+
"support_parents_financially",
|
| 191 |
+
"family_communication_frequency",
|
| 192 |
+
"food_preference",
|
| 193 |
+
"smoking_habit",
|
| 194 |
+
"alcohol_habit",
|
| 195 |
+
"daily_routine",
|
| 196 |
+
"fitness_level",
|
| 197 |
+
"own_pets",
|
| 198 |
+
"travel_preference",
|
| 199 |
+
"relaxation_mode",
|
| 200 |
+
"job_role",
|
| 201 |
+
"work_experience_years",
|
| 202 |
+
"career_aspirations",
|
| 203 |
+
"field_of_study",
|
| 204 |
+
"remark",
|
| 205 |
+
"children_timeline",
|
| 206 |
+
"open_to_adoption",
|
| 207 |
+
"deal_breakers",
|
| 208 |
+
"other_non_negotiables",
|
| 209 |
+
"health_constraints",
|
| 210 |
+
"live_with_inlaws",
|
| 211 |
]
|
| 212 |
+
|
| 213 |
for col in marriage_columns:
|
| 214 |
v = profile.get(col)
|
| 215 |
if v not in (None, "", []):
|
| 216 |
out[col] = v
|
| 217 |
+
|
| 218 |
return out
|
| 219 |
|
| 220 |
+
|
| 221 |
def offline_generate_batch(themes: List[str], state: Dict, context: str = "") -> List[Dict]:
|
| 222 |
prof = state.get("profile", {}) or {}
|
| 223 |
name = prof.get("full_name") or "Partner"
|
|
|
|
| 247 |
|
| 248 |
# incorporate small bit from context if available (first 120 chars)
|
| 249 |
if context:
|
| 250 |
+
ctx_snip = context.replace("\n", " ")[:120]
|
| 251 |
q = f"{q} (Note: {ctx_snip})"
|
| 252 |
|
| 253 |
# Keep concise
|
|
|
|
| 255 |
q = " ".join(q.split()[:20])
|
| 256 |
|
| 257 |
opts = [
|
| 258 |
+
{"text": "Check facts and numbers", "color": "blue"},
|
| 259 |
+
{"text": "Make a step-by-step plan", "color": "green"},
|
| 260 |
+
{"text": "Get people together and act", "color": "red"},
|
| 261 |
+
{"text": "Think of new ideas", "color": "yellow"},
|
| 262 |
]
|
| 263 |
random.shuffle(opts)
|
| 264 |
items.append({"question": q, "options": opts, "source": "fallback"})
|
| 265 |
return items
|
| 266 |
|
| 267 |
|
| 268 |
+
def generate_category_specific_options(
|
| 269 |
+
question_type: str,
|
| 270 |
+
question_text: str,
|
| 271 |
+
profile_data: Dict = None,
|
| 272 |
+
expectation_data: Dict = None,
|
| 273 |
+
) -> List[Dict]:
|
| 274 |
+
"""Generate options tailored to the question category and content"""
|
| 275 |
question_lower = question_text.lower()
|
| 276 |
+
|
| 277 |
# Extract key themes from question for better contextualization
|
| 278 |
themes_in_question = []
|
| 279 |
+
for theme in [
|
| 280 |
+
"learning",
|
| 281 |
+
"problem",
|
| 282 |
+
"conflict",
|
| 283 |
+
"money",
|
| 284 |
+
"family",
|
| 285 |
+
"work",
|
| 286 |
+
"social",
|
| 287 |
+
"stress",
|
| 288 |
+
"decision",
|
| 289 |
+
"plan",
|
| 290 |
+
]:
|
| 291 |
if theme in question_lower:
|
| 292 |
themes_in_question.append(theme)
|
| 293 |
+
|
| 294 |
+
# Default option templates for each color (Simple English)
|
| 295 |
base_options = {
|
| 296 |
"blue": {
|
| 297 |
+
"profile": "Study all the facts and think carefully",
|
| 298 |
+
"expectation": "Get all the details before deciding",
|
| 299 |
+
"character": "Look at the facts and think it through",
|
| 300 |
},
|
| 301 |
"green": {
|
| 302 |
+
"profile": "Follow a clear, step-by-step way",
|
| 303 |
+
"expectation": "Set clear rules and follow them",
|
| 304 |
+
"character": "Make a plan and stick to it",
|
| 305 |
},
|
| 306 |
"red": {
|
| 307 |
+
"profile": "Do something right away to fix it",
|
| 308 |
+
"expectation": "Take charge and get things done fast",
|
| 309 |
+
"character": "Act now and get people to help",
|
| 310 |
},
|
| 311 |
"yellow": {
|
| 312 |
+
"profile": "Try new ways and think differently",
|
| 313 |
+
"expectation": "Think of new ideas and what could be",
|
| 314 |
+
"character": "Think of creative ideas and new ways",
|
| 315 |
+
},
|
| 316 |
}
|
| 317 |
+
|
| 318 |
+
# Contextual variations based on question themes (Simple English)
|
| 319 |
contextual_variations = {
|
| 320 |
"learning": {
|
| 321 |
+
"blue": "Study in a careful, organized way",
|
| 322 |
+
"green": "Follow the lessons step by step",
|
| 323 |
+
"red": "Start doing it right away to learn",
|
| 324 |
+
"yellow": "Try different ways to learn",
|
| 325 |
},
|
| 326 |
"problem": {
|
| 327 |
+
"blue": "Look at all the facts to find why",
|
| 328 |
+
"green": "Fix each part one by one",
|
| 329 |
+
"red": "Do something now to fix it",
|
| 330 |
+
"yellow": "Find a new way around it",
|
| 331 |
},
|
| 332 |
"conflict": {
|
| 333 |
+
"blue": "Think about each side fairly",
|
| 334 |
+
"green": "Find a fair way to solve it",
|
| 335 |
+
"red": "Face it directly and fix it",
|
| 336 |
+
"yellow": "Find a new way to agree",
|
| 337 |
},
|
| 338 |
"money": {
|
| 339 |
+
"blue": "Look at all the money details",
|
| 340 |
+
"green": "Plan spending and track costs",
|
| 341 |
+
"red": "Make quick money choices",
|
| 342 |
+
"yellow": "Think of new ways to earn",
|
| 343 |
},
|
| 344 |
"family": {
|
| 345 |
+
"blue": "Think about family matters clearly",
|
| 346 |
+
"green": "Keep family ways and routines",
|
| 347 |
+
"red": "Take the lead in family things",
|
| 348 |
+
"yellow": "Try new family activities",
|
| 349 |
+
},
|
| 350 |
}
|
| 351 |
+
|
| 352 |
# Start with base options for the category
|
| 353 |
options = []
|
| 354 |
for color in COLOR_KEYS:
|
| 355 |
base_text = base_options[color][question_type]
|
| 356 |
+
|
| 357 |
# Add contextual variation if theme matches
|
| 358 |
for theme, variations in contextual_variations.items():
|
| 359 |
if theme in themes_in_question:
|
| 360 |
base_text = variations[color]
|
| 361 |
break
|
| 362 |
+
|
| 363 |
+
# Add shadow/negative aspects for realism (Simple English)
|
| 364 |
shadow_aspects = {
|
| 365 |
"blue": {
|
| 366 |
+
"profile": " (but can overthink things)",
|
| 367 |
+
"expectation": " (but can think too much)",
|
| 368 |
+
"character": " (but can be too picky)",
|
| 369 |
},
|
| 370 |
"green": {
|
| 371 |
+
"profile": " (but can be too strict)",
|
| 372 |
+
"expectation": " (but can make too many rules)",
|
| 373 |
+
"character": " (but can hate change)",
|
| 374 |
},
|
| 375 |
"red": {
|
| 376 |
+
"profile": " (but can act too fast)",
|
| 377 |
+
"expectation": " (but can be too bossy)",
|
| 378 |
+
"character": " (but can miss details)",
|
| 379 |
},
|
| 380 |
"yellow": {
|
| 381 |
+
"profile": " (but can dream too much)",
|
| 382 |
+
"expectation": " (but can forget to finish)",
|
| 383 |
+
"character": " (but can be all over the place)",
|
| 384 |
+
},
|
| 385 |
}
|
| 386 |
+
|
| 387 |
# Only add shadow aspects occasionally (30% chance) for variety
|
| 388 |
if random.random() < 0.3:
|
| 389 |
shadow = shadow_aspects[color][question_type]
|
| 390 |
+
# Ensure we do not exceed word limit
|
| 391 |
if len(base_text.split()) + len(shadow.split()) <= 15:
|
| 392 |
base_text += shadow
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 393 |
|
| 394 |
+
options.append(
|
| 395 |
+
{
|
| 396 |
+
"text": base_text[:80], # Limit length
|
| 397 |
+
"color": color,
|
| 398 |
+
}
|
| 399 |
+
)
|
| 400 |
+
|
| 401 |
+
return options
|
| 402 |
|
| 403 |
|
| 404 |
+
def get_book_based_options(
|
| 405 |
+
question_type: str, question_text: str
|
| 406 |
+
) -> List[Dict]:
|
| 407 |
+
"""
|
| 408 |
+
Return four options (one per color) for book-based mode.
|
| 409 |
|
| 410 |
+
For now this simply delegates to generate_category_specific_options so
|
| 411 |
+
that the function always exists and never raises NameError.
|
| 412 |
+
Later you can replace this logic to actually use Surrounded-by-Idiots
|
| 413 |
+
COLOR_EXAMPLES from faiss_service if you want.
|
| 414 |
+
"""
|
| 415 |
+
return generate_category_specific_options(question_type, question_text, None, None)
|
| 416 |
|
| 417 |
|
| 418 |
+
def generate_batch_questions(
|
| 419 |
+
themes: List[str],
|
| 420 |
+
state: Dict,
|
| 421 |
+
context: str = "",
|
| 422 |
+
previous_questions: List[str] = None,
|
| 423 |
+
) -> List[Dict]:
|
| 424 |
# Extract ALL data from Marriage table
|
| 425 |
profile = state.get("profile", {})
|
| 426 |
user_id = profile.get("user_id")
|
| 427 |
+
|
| 428 |
try:
|
| 429 |
from database import fetch_expectation_data
|
| 430 |
+
|
| 431 |
expectation_data = fetch_expectation_data(user_id) if user_id else {}
|
| 432 |
except ImportError:
|
| 433 |
expectation_data = {}
|
| 434 |
+
|
| 435 |
# Extract ALL profile data from Marriage table
|
| 436 |
profile_data = {
|
| 437 |
# Personal Information
|
| 438 |
"full_name": profile.get("full_name", "Not specified"),
|
|
|
|
| 439 |
"gender": profile.get("gender", "Not specified"),
|
| 440 |
"current_city": profile.get("current_city", "Not specified"),
|
| 441 |
"country": profile.get("country", "Not specified"),
|
| 442 |
"marital_status": profile.get("marital_status", "Not specified"),
|
|
|
|
| 443 |
# Education & Career
|
| 444 |
"education_level": profile.get("education_level", "Not specified"),
|
| 445 |
"employment_status": profile.get("employment_status", "Not specified"),
|
|
|
|
| 448 |
"career_aspirations": profile.get("career_aspirations", "Not specified"),
|
| 449 |
"field_of_study": profile.get("field_of_study", "Not specified"),
|
| 450 |
"income_range": profile.get("income_range", "Not specified"),
|
|
|
|
| 451 |
# Family & Background
|
| 452 |
"number_of_siblings": profile.get("number_of_siblings", "Not specified"),
|
| 453 |
"family_type": profile.get("family_type", "Not specified"),
|
| 454 |
"siblings_position": profile.get("siblings_position", "Not specified"),
|
| 455 |
"parents_living_status": profile.get("parents_living_status", "Not specified"),
|
| 456 |
"live_with_parents": profile.get("live_with_parents", "Not specified"),
|
| 457 |
+
"support_parents_financially": profile.get(
|
| 458 |
+
"support_parents_financially", "Not specified"
|
| 459 |
+
),
|
| 460 |
+
"family_communication_frequency": profile.get(
|
| 461 |
+
"family_communication_frequency", "Not specified"
|
| 462 |
+
),
|
| 463 |
# Physical & Health
|
| 464 |
"height": profile.get("height", "Not specified"),
|
| 465 |
"skin_tone": profile.get("skin_tone", "Not specified"),
|
| 466 |
"blood_group": profile.get("blood_group", "Not specified"),
|
| 467 |
"health_constraints": profile.get("health_constraints", "Not specified"),
|
| 468 |
"fitness_level": profile.get("fitness_level", "Not specified"),
|
|
|
|
| 469 |
# Lifestyle & Habits
|
| 470 |
"hobbies_interests": str(profile.get("hobbies_interests", "Not specified")),
|
| 471 |
"conflict_approach": profile.get("conflict_approach", "Not specified"),
|
|
|
|
| 477 |
"own_pets": profile.get("own_pets", "Not specified"),
|
| 478 |
"travel_preference": profile.get("travel_preference", "Not specified"),
|
| 479 |
"relaxation_mode": profile.get("relaxation_mode", "Not specified"),
|
|
|
|
| 480 |
# Languages & Relocation
|
| 481 |
"languages_spoken": profile.get("languages_spoken", "Not specified"),
|
| 482 |
"relocation_willingness": profile.get("relocation_willingness", "Not specified"),
|
|
|
|
| 483 |
# Religion & Citizenship
|
| 484 |
"religion": profile.get("religion", "Not specified"),
|
| 485 |
"dual_citizenship": profile.get("dual_citizenship", "Not specified"),
|
|
|
|
| 486 |
# Relationship Preferences
|
| 487 |
"children_timeline": profile.get("children_timeline", "Not specified"),
|
| 488 |
"open_to_adoption": profile.get("open_to_adoption", "Not specified"),
|
| 489 |
"deal_breakers": profile.get("deal_breakers", "Not specified"),
|
| 490 |
"other_non_negotiables": profile.get("other_non_negotiables", "Not specified"),
|
| 491 |
"live_with_inlaws": profile.get("live_with_inlaws", "Not specified"),
|
|
|
|
| 492 |
# Additional Info
|
| 493 |
"remark": profile.get("remark", "Not specified"),
|
| 494 |
}
|
| 495 |
+
|
| 496 |
# Extract ALL expectation data from ExpectationResponse table
|
| 497 |
expectation_data_dict = {
|
| 498 |
# Basic Preferences
|
|
|
|
| 501 |
"pref_current_city": expectation_data.get("pref_current_city", "Not specified"),
|
| 502 |
"pref_countries": expectation_data.get("pref_countries", "Not specified"),
|
| 503 |
"pref_languages": expectation_data.get("pref_languages", "Not specified"),
|
| 504 |
+
"pref_education_level": expectation_data.get(
|
| 505 |
+
"pref_education_level", "Not specified"
|
| 506 |
+
),
|
| 507 |
+
"pref_employment_status": expectation_data.get(
|
| 508 |
+
"pref_employment_status", "Not specified"
|
| 509 |
+
),
|
| 510 |
# Health & Lifestyle
|
| 511 |
"health_constraints": expectation_data.get("health_constraints", "Not specified"),
|
| 512 |
"pref_diet": expectation_data.get("pref_diet", "Not specified"),
|
| 513 |
"accept_smoking": expectation_data.get("accept_smoking", "Not specified"),
|
| 514 |
"accept_alcohol": expectation_data.get("accept_alcohol", "Not specified"),
|
| 515 |
"pref_fitness": expectation_data.get("pref_fitness", "Not specified"),
|
|
|
|
| 516 |
# Family & Living
|
| 517 |
"pref_family_type": expectation_data.get("pref_family_type", "Not specified"),
|
| 518 |
"live_with_inlaws": expectation_data.get("live_with_inlaws", "Not specified"),
|
| 519 |
"children_timeline": expectation_data.get("children_timeline", "Not specified"),
|
| 520 |
"open_to_adoption": expectation_data.get("open_to_adoption", "Not specified"),
|
| 521 |
+
"pref_live_with_parents": expectation_data.get(
|
| 522 |
+
"pref_live_with_parents", "Not specified"
|
| 523 |
+
),
|
| 524 |
+
"financial_support_to_parents": expectation_data.get(
|
| 525 |
+
"financial_support_to_parents", "Not specified"
|
| 526 |
+
),
|
| 527 |
# Conflict & Finance
|
| 528 |
+
"pref_conflict_approach": expectation_data.get(
|
| 529 |
+
"pref_conflict_approach", "Not specified"
|
| 530 |
+
),
|
| 531 |
+
"pref_financial_style": expectation_data.get(
|
| 532 |
+
"pref_financial_style", "Not specified"
|
| 533 |
+
),
|
| 534 |
"pref_income_range": expectation_data.get("pref_income_range", "Not specified"),
|
|
|
|
| 535 |
# Values & Compatibility
|
| 536 |
+
"religion_alignment": expectation_data.get(
|
| 537 |
+
"religion_alignment", "Not specified"
|
| 538 |
+
),
|
| 539 |
+
"pref_shared_hobbies": expectation_data.get(
|
| 540 |
+
"pref_shared_hobbies", "Not specified"
|
| 541 |
+
),
|
| 542 |
"travel_pref": expectation_data.get("travel_pref", "Not specified"),
|
| 543 |
"pet_pref": expectation_data.get("pet_pref", "Not specified"),
|
|
|
|
| 544 |
# Career & Relocation
|
| 545 |
+
"pref_partner_relocation": expectation_data.get(
|
| 546 |
+
"pref_partner_relocation", "Not specified"
|
| 547 |
+
),
|
| 548 |
+
"pref_career_aspirations": expectation_data.get(
|
| 549 |
+
"pref_career_aspirations", "Not specified"
|
| 550 |
+
),
|
| 551 |
# Additional Preferences
|
| 552 |
"marital_status": expectation_data.get("marital_status", "Not specified"),
|
| 553 |
"skin_tone": expectation_data.get("skin_tone", "Not specified"),
|
| 554 |
"daily_routine": expectation_data.get("daily_routine", "Not specified"),
|
| 555 |
+
"family_communication_frequency": expectation_data.get(
|
| 556 |
+
"family_communication_frequency", "Not specified"
|
| 557 |
+
),
|
| 558 |
"relaxation_mode": expectation_data.get("relaxation_mode", "Not specified"),
|
|
|
|
| 559 |
# Non-negotiables
|
| 560 |
"deal_breakers": expectation_data.get("deal_breakers", "Not specified"),
|
| 561 |
+
"other_non_negotiables": expectation_data.get(
|
| 562 |
+
"other_non_negotiables", "Not specified"
|
| 563 |
+
),
|
| 564 |
# Summary
|
| 565 |
"expectation_summary": expectation_data.get("expectation_summary", "Not specified"),
|
| 566 |
"_mandatory_fields": expectation_data.get("_mandatory_fields", "Not specified"),
|
| 567 |
}
|
| 568 |
+
|
| 569 |
+
# Check if we have book-based options
|
| 570 |
+
try:
|
| 571 |
+
from faiss_service import COLOR_EXAMPLES
|
| 572 |
+
|
| 573 |
+
use_book_options = COLOR_EXAMPLES is not None
|
| 574 |
+
if use_book_options:
|
| 575 |
+
print("Using book-based options from 'Surrounded by Idiots'")
|
| 576 |
+
except ImportError:
|
| 577 |
+
use_book_options = False
|
| 578 |
+
|
| 579 |
if CHAIN_BATCH is not None and PARSER_BATCH is not None:
|
| 580 |
try:
|
| 581 |
items: List[Dict] = []
|
| 582 |
+
|
| 583 |
# 1. PROFILE-BASED QUESTIONS (5 questions) - Using ALL Marriage table columns
|
| 584 |
profile_prompt = {
|
| 585 |
"state": json.dumps(state, ensure_ascii=False),
|
| 586 |
"themes_json": json.dumps(["profile"] * 5, ensure_ascii=False),
|
| 587 |
+
"previous_questions": json.dumps(
|
| 588 |
+
previous_questions or [], ensure_ascii=False
|
| 589 |
+
),
|
| 590 |
"format_instructions": PARSER_BATCH.get_format_instructions(),
|
| 591 |
"context": "Generate 5 PROFILE-BASED questions using ALL available user background information.",
|
| 592 |
"question_type": "profile",
|
|
|
|
| 593 |
# Use ALL profile data
|
| 594 |
"education": f"{profile_data['education_level']} | {profile_data['field_of_study']}",
|
| 595 |
"employment": f"{profile_data['employment_status']} | {profile_data['job_role']} ({profile_data['work_experience_years']} years)",
|
| 596 |
+
"hobbies": profile_data["hobbies_interests"],
|
| 597 |
"family_type": f"{profile_data['family_type']} | Siblings: {profile_data['number_of_siblings']} | Position: {profile_data['siblings_position']}",
|
| 598 |
+
"current_lifestyle": (
|
| 599 |
+
"City: "
|
| 600 |
+
f"{profile_data['current_city']}, {profile_data['country']} | "
|
| 601 |
+
f"Height: {profile_data['height']} | Languages: {profile_data['languages_spoken']} | "
|
| 602 |
+
f"Religion: {profile_data['religion']} | Fitness: {profile_data['fitness_level']} | "
|
| 603 |
+
f"Diet: {profile_data['food_preference']} | Habits: Smoking: {profile_data['smoking_habit']}, "
|
| 604 |
+
f"Alcohol: {profile_data['alcohol_habit']}"
|
| 605 |
+
),
|
| 606 |
# Expectation data marked as irrelevant
|
| 607 |
"conflict_style": "IRRELEVANT_FOR_PROFILE_QUESTIONS",
|
| 608 |
"financial_style": "IRRELEVANT_FOR_PROFILE_QUESTIONS",
|
|
|
|
| 615 |
"ambition_pref": "IRRELEVANT_FOR_PROFILE_QUESTIONS",
|
| 616 |
"deal_breakers": "IRRELEVANT_FOR_PROFILE_QUESTIONS",
|
| 617 |
}
|
| 618 |
+
|
| 619 |
result = CHAIN_BATCH.invoke(profile_prompt)
|
| 620 |
profile_items = get_items_from_result(result)
|
| 621 |
+
|
| 622 |
for qa in profile_items[:5]:
|
| 623 |
out = qa.dict() if hasattr(qa, "dict") else dict(qa)
|
| 624 |
+
|
| 625 |
+
# Get options from book if available
|
| 626 |
+
if use_book_options:
|
| 627 |
+
options = get_book_based_options("profile", out.get("question", ""))
|
| 628 |
+
if options and len(options) == 4:
|
| 629 |
+
out["options"] = options
|
| 630 |
+
else:
|
| 631 |
+
# Fallback to generated options
|
| 632 |
+
out["options"] = generate_category_specific_options(
|
| 633 |
+
"profile", out.get("question", ""), profile_data, None
|
| 634 |
+
)
|
| 635 |
+
else:
|
| 636 |
+
out["options"] = generate_category_specific_options(
|
| 637 |
+
"profile", out.get("question", ""), profile_data, None
|
| 638 |
+
)
|
| 639 |
+
|
| 640 |
+
out["source"] = (
|
| 641 |
+
"llm_profile_book" if use_book_options else "llm_profile"
|
| 642 |
)
|
|
|
|
| 643 |
out["question_type"] = "profile"
|
| 644 |
random.shuffle(out["options"])
|
| 645 |
items.append(out)
|
| 646 |
+
|
| 647 |
# 2. EXPECTATION-BASED QUESTIONS (5 questions) - Using ALL ExpectationResponse columns
|
| 648 |
expectation_prompt = {
|
| 649 |
"state": json.dumps(state, ensure_ascii=False),
|
| 650 |
"themes_json": json.dumps(["expectation"] * 5, ensure_ascii=False),
|
| 651 |
+
"previous_questions": json.dumps(
|
| 652 |
+
[q["question"] for q in items] + (previous_questions or []),
|
| 653 |
+
ensure_ascii=False,
|
| 654 |
+
),
|
| 655 |
"format_instructions": PARSER_BATCH.get_format_instructions(),
|
| 656 |
"context": "Generate 5 EXPECTATION-BASED questions using ALL relationship preferences and expectations.",
|
| 657 |
"question_type": "expectation",
|
|
|
|
| 658 |
# Minimal profile context
|
| 659 |
"education": "Background context only",
|
| 660 |
"employment": "Background context only",
|
| 661 |
"hobbies": "Background context only",
|
| 662 |
"family_type": "Background context only",
|
| 663 |
"current_lifestyle": "General context",
|
|
|
|
| 664 |
# Use ALL expectation data
|
| 665 |
"conflict_style": f"{expectation_data_dict['pref_conflict_approach']}",
|
| 666 |
+
"financial_style": (
|
| 667 |
+
f"{expectation_data_dict['pref_financial_style']} | "
|
| 668 |
+
f"Income: {expectation_data_dict['pref_income_range']}"
|
| 669 |
+
),
|
| 670 |
+
"income_range": expectation_data_dict["pref_income_range"],
|
| 671 |
+
"relocation_willingness": (
|
| 672 |
+
f"{expectation_data_dict['pref_partner_relocation']}"
|
| 673 |
+
),
|
| 674 |
+
"family_values": (
|
| 675 |
+
f"{expectation_data_dict['pref_family_type']} | "
|
| 676 |
+
f"Live with in-laws: {expectation_data_dict['live_with_inlaws']} | "
|
| 677 |
+
f"Children timeline: {expectation_data_dict['children_timeline']}"
|
| 678 |
+
),
|
| 679 |
+
"core_values": (
|
| 680 |
+
f"Religion: {expectation_data_dict['religion_alignment']} | "
|
| 681 |
+
f"Deal breakers: {expectation_data_dict['deal_breakers']}"
|
| 682 |
+
),
|
| 683 |
+
"lifestyle_pref": (
|
| 684 |
+
f"Fitness: {expectation_data_dict['pref_fitness']} | "
|
| 685 |
+
f"Diet: {expectation_data_dict['pref_diet']} | "
|
| 686 |
+
f"Daily routine: {expectation_data_dict['daily_routine']}"
|
| 687 |
+
),
|
| 688 |
+
"social_pref": (
|
| 689 |
+
f"Hobbies: {expectation_data_dict['pref_shared_hobbies']} | "
|
| 690 |
+
f"Travel: {expectation_data_dict['travel_pref']} | "
|
| 691 |
+
f"Pets: {expectation_data_dict['pet_pref']}"
|
| 692 |
+
),
|
| 693 |
+
"ambition_pref": (
|
| 694 |
+
f"Career: {expectation_data_dict['pref_career_aspirations']} | "
|
| 695 |
+
f"Education: {expectation_data_dict['pref_education_level']}"
|
| 696 |
+
),
|
| 697 |
+
"deal_breakers": (
|
| 698 |
+
f"{expectation_data_dict['deal_breakers']} | "
|
| 699 |
+
f"Other non-negotiables: {expectation_data_dict['other_non_negotiables']}"
|
| 700 |
+
),
|
| 701 |
}
|
| 702 |
+
|
| 703 |
result = CHAIN_BATCH.invoke(expectation_prompt)
|
| 704 |
expectation_items = get_items_from_result(result)
|
| 705 |
+
|
| 706 |
for qa in expectation_items[:5]:
|
| 707 |
out = qa.dict() if hasattr(qa, "dict") else dict(qa)
|
| 708 |
+
|
| 709 |
+
# Get options from book if available
|
| 710 |
+
if use_book_options:
|
| 711 |
+
options = get_book_based_options("expectation", out.get("question", ""))
|
| 712 |
+
if options and len(options) == 4:
|
| 713 |
+
out["options"] = options
|
| 714 |
+
else:
|
| 715 |
+
out["options"] = generate_category_specific_options(
|
| 716 |
+
"expectation",
|
| 717 |
+
out.get("question", ""),
|
| 718 |
+
None,
|
| 719 |
+
expectation_data_dict,
|
| 720 |
+
)
|
| 721 |
+
else:
|
| 722 |
+
out["options"] = generate_category_specific_options(
|
| 723 |
+
"expectation",
|
| 724 |
+
out.get("question", ""),
|
| 725 |
+
None,
|
| 726 |
+
expectation_data_dict,
|
| 727 |
+
)
|
| 728 |
+
|
| 729 |
+
out["source"] = (
|
| 730 |
+
"llm_expectation_book" if use_book_options else "llm_expectation"
|
| 731 |
)
|
|
|
|
| 732 |
out["question_type"] = "expectation"
|
| 733 |
random.shuffle(out["options"])
|
| 734 |
items.append(out)
|
| 735 |
+
|
| 736 |
# 3. CHARACTER-BASED QUESTIONS (10 questions) - Using data from BOTH tables
|
| 737 |
character_prompt = {
|
| 738 |
"state": json.dumps(state, ensure_ascii=False),
|
| 739 |
"themes_json": json.dumps(themes[:10], ensure_ascii=False),
|
| 740 |
+
"previous_questions": json.dumps(
|
| 741 |
+
[q["question"] for q in items] + (previous_questions or []),
|
| 742 |
+
ensure_ascii=False,
|
| 743 |
+
),
|
| 744 |
"format_instructions": PARSER_BATCH.get_format_instructions(),
|
| 745 |
+
"context": context[:2000]
|
| 746 |
+
+ "\n\nGenerate 10 CHARACTER-BASED questions using ALL available data.",
|
| 747 |
"question_type": "character",
|
|
|
|
| 748 |
# All data from Marriage table
|
| 749 |
+
"education": profile_data["education_level"],
|
| 750 |
+
"employment": profile_data["employment_status"],
|
| 751 |
+
"hobbies": profile_data["hobbies_interests"],
|
| 752 |
+
"family_type": profile_data["family_type"],
|
| 753 |
+
"current_lifestyle": (
|
| 754 |
+
f"{profile_data['current_city']}, {profile_data['country']} | "
|
| 755 |
+
f"{profile_data['daily_routine']} | Relaxation: {profile_data['relaxation_mode']}"
|
| 756 |
+
),
|
| 757 |
# All data from ExpectationResponse table
|
| 758 |
+
"conflict_style": expectation_data_dict["pref_conflict_approach"],
|
| 759 |
+
"financial_style": expectation_data_dict["pref_financial_style"],
|
| 760 |
+
"income_range": expectation_data_dict["pref_income_range"],
|
| 761 |
+
"relocation_willingness": expectation_data_dict[
|
| 762 |
+
"pref_partner_relocation"
|
| 763 |
+
],
|
| 764 |
+
"family_values": expectation_data_dict["pref_family_type"],
|
| 765 |
+
"core_values": expectation_data_dict["religion_alignment"],
|
| 766 |
+
"lifestyle_pref": expectation_data_dict["pref_fitness"],
|
| 767 |
+
"social_pref": expectation_data_dict["pref_shared_hobbies"],
|
| 768 |
+
"ambition_pref": expectation_data_dict["pref_career_aspirations"],
|
| 769 |
+
"deal_breakers": expectation_data_dict["deal_breakers"],
|
| 770 |
}
|
| 771 |
+
|
| 772 |
result = CHAIN_BATCH.invoke(character_prompt)
|
| 773 |
character_items = get_items_from_result(result)
|
| 774 |
+
|
| 775 |
for qa in character_items[:10]:
|
| 776 |
out = qa.dict() if hasattr(qa, "dict") else dict(qa)
|
| 777 |
+
|
| 778 |
+
# Get options from book if available
|
| 779 |
+
if use_book_options:
|
| 780 |
+
options = get_book_based_options("character", out.get("question", ""))
|
| 781 |
+
if options and len(options) == 4:
|
| 782 |
+
out["options"] = options
|
| 783 |
+
else:
|
| 784 |
+
out["options"] = generate_category_specific_options(
|
| 785 |
+
"character",
|
| 786 |
+
out.get("question", ""),
|
| 787 |
+
profile_data,
|
| 788 |
+
expectation_data_dict,
|
| 789 |
+
)
|
| 790 |
+
else:
|
| 791 |
+
out["options"] = generate_category_specific_options(
|
| 792 |
+
"character",
|
| 793 |
+
out.get("question", ""),
|
| 794 |
+
profile_data,
|
| 795 |
+
expectation_data_dict,
|
| 796 |
+
)
|
| 797 |
+
|
| 798 |
+
out["source"] = (
|
| 799 |
+
"llm_character_book" if use_book_options else "llm_character"
|
| 800 |
)
|
|
|
|
| 801 |
out["question_type"] = "character"
|
| 802 |
random.shuffle(out["options"])
|
| 803 |
items.append(out)
|
| 804 |
+
|
| 805 |
# Verify we have exactly 20 questions
|
| 806 |
if len(items) == 20:
|
| 807 |
return items[:20]
|
| 808 |
else:
|
| 809 |
+
# If LLM did not generate enough, fill with fallback
|
| 810 |
+
return fill_missing_questions(
|
| 811 |
+
items, themes, state, profile_data, expectation_data_dict, context
|
| 812 |
+
)
|
| 813 |
+
|
| 814 |
except Exception as e:
|
| 815 |
print("LLM batch generation failed:", e)
|
| 816 |
+
return generate_fallback_with_distribution(
|
| 817 |
+
themes, state, profile_data, expectation_data_dict, context
|
| 818 |
+
)
|
| 819 |
else:
|
| 820 |
+
return generate_fallback_with_distribution(
|
| 821 |
+
themes, state, profile_data, expectation_data_dict, context
|
| 822 |
+
)
|
| 823 |
|
| 824 |
|
| 825 |
def get_items_from_result(result):
|
|
|
|
| 831 |
else:
|
| 832 |
return []
|
| 833 |
|
| 834 |
+
|
| 835 |
+
def fill_missing_questions(
|
| 836 |
+
current_items: List[Dict],
|
| 837 |
+
themes: List[str],
|
| 838 |
+
state: Dict,
|
| 839 |
+
profile_data: Dict,
|
| 840 |
+
expectation_data: Dict,
|
| 841 |
+
context: str = "",
|
| 842 |
+
) -> List[Dict]:
|
| 843 |
"""Fill missing questions to reach 20 total"""
|
| 844 |
items = current_items.copy()
|
| 845 |
+
|
| 846 |
# Count current distribution
|
| 847 |
profile_count = sum(1 for q in items if q.get("question_type") == "profile")
|
| 848 |
expectation_count = sum(1 for q in items if q.get("question_type") == "expectation")
|
| 849 |
character_count = sum(1 for q in items if q.get("question_type") == "character")
|
| 850 |
+
|
| 851 |
# Fill profile questions if needed
|
| 852 |
while profile_count < 5:
|
| 853 |
profile_q = generate_profile_question(state, profile_data)
|
| 854 |
items.append(profile_q)
|
| 855 |
profile_count += 1
|
| 856 |
+
|
| 857 |
# Fill expectation questions if needed
|
| 858 |
while expectation_count < 5:
|
| 859 |
expectation_q = generate_expectation_question(state, expectation_data)
|
| 860 |
items.append(expectation_q)
|
| 861 |
expectation_count += 1
|
| 862 |
+
|
| 863 |
# Fill character questions if needed
|
| 864 |
while character_count < 10:
|
| 865 |
theme = themes[character_count % len(themes)] if themes else "daily situation"
|
| 866 |
character_q = generate_character_question(theme, state)
|
| 867 |
items.append(character_q)
|
| 868 |
character_count += 1
|
| 869 |
+
|
| 870 |
return items[:20]
|
| 871 |
|
| 872 |
+
|
| 873 |
def generate_profile_question(state: Dict, profile_data: Dict) -> Dict:
|
| 874 |
"""Generate a single profile question"""
|
| 875 |
prof = state.get("profile", {})
|
| 876 |
name = prof.get("full_name") or "Partner"
|
| 877 |
+
|
| 878 |
profile_topics = [
|
| 879 |
+
(
|
| 880 |
+
"education",
|
| 881 |
+
"How does your education background shape how you approach complex information?",
|
| 882 |
+
),
|
| 883 |
+
(
|
| 884 |
+
"employment",
|
| 885 |
+
"What methods from your work life do you use for personal challenges?",
|
| 886 |
+
),
|
| 887 |
+
(
|
| 888 |
+
"hobbies",
|
| 889 |
+
"How do your hobbies change the way you try new experiences?",
|
| 890 |
+
),
|
| 891 |
+
(
|
| 892 |
+
"family",
|
| 893 |
+
"What communication style from your family feels most natural to you?",
|
| 894 |
+
),
|
| 895 |
+
(
|
| 896 |
+
"background",
|
| 897 |
+
"How does your personal history affect your current decision-making style?",
|
| 898 |
+
),
|
| 899 |
]
|
| 900 |
+
|
| 901 |
+
topic_idx = len(
|
| 902 |
+
[q for q in state.get("history", []) if q.get("question_type") == "profile"]
|
| 903 |
+
)
|
| 904 |
if topic_idx >= len(profile_topics):
|
| 905 |
topic_idx = 0
|
| 906 |
+
|
| 907 |
topic, question = profile_topics[topic_idx]
|
| 908 |
+
|
| 909 |
# Generate contextual options
|
| 910 |
options = generate_category_specific_options("profile", question, profile_data, None)
|
| 911 |
random.shuffle(options)
|
| 912 |
+
|
| 913 |
return {
|
| 914 |
"question": question,
|
| 915 |
"options": options,
|
| 916 |
"source": "fallback_profile",
|
| 917 |
+
"question_type": "profile",
|
| 918 |
}
|
| 919 |
|
| 920 |
+
|
| 921 |
def generate_expectation_question(state: Dict, expectation_data: Dict) -> Dict:
|
| 922 |
"""Generate a single expectation question"""
|
| 923 |
expectation_topics = [
|
| 924 |
+
(
|
| 925 |
+
"conflict",
|
| 926 |
+
"When tensions arise, what is your most natural way to handle them?",
|
| 927 |
+
),
|
| 928 |
+
("values", "How do your core principles guide your everyday choices?"),
|
| 929 |
+
("finance", "What mindset guides your shared financial decisions?"),
|
| 930 |
+
("balance", "How do you balance your needs with your partner's needs?"),
|
| 931 |
+
("dealbreakers", "What personal boundaries feel completely non-negotiable to you?"),
|
| 932 |
]
|
| 933 |
+
|
| 934 |
+
topic_idx = len(
|
| 935 |
+
[q for q in state.get("history", []) if q.get("question_type") == "expectation"]
|
| 936 |
+
)
|
| 937 |
if topic_idx >= len(expectation_topics):
|
| 938 |
topic_idx = 0
|
| 939 |
+
|
| 940 |
topic, question = expectation_topics[topic_idx]
|
| 941 |
+
|
| 942 |
# Generate contextual options
|
| 943 |
+
options = generate_category_specific_options(
|
| 944 |
+
"expectation", question, None, expectation_data
|
| 945 |
+
)
|
| 946 |
random.shuffle(options)
|
| 947 |
+
|
| 948 |
return {
|
| 949 |
"question": question,
|
| 950 |
"options": options,
|
| 951 |
"source": "fallback_expectation",
|
| 952 |
+
"question_type": "expectation",
|
| 953 |
}
|
| 954 |
|
| 955 |
|
|
|
|
| 957 |
"""Generate a single character question"""
|
| 958 |
prof = state.get("profile", {})
|
| 959 |
name = prof.get("full_name") or "Partner"
|
| 960 |
+
|
| 961 |
short_theme = theme.split(" around ")[-1].strip()[:50]
|
| 962 |
+
question = f"When {short_theme}, what is your typical response?"
|
| 963 |
+
|
| 964 |
# Generate contextual options
|
| 965 |
options = generate_category_specific_options("character", question, None, None)
|
| 966 |
random.shuffle(options)
|
| 967 |
+
|
| 968 |
return {
|
| 969 |
"question": question,
|
| 970 |
"options": options,
|
| 971 |
"source": "fallback_character",
|
| 972 |
+
"question_type": "character",
|
| 973 |
}
|
| 974 |
|
| 975 |
|
| 976 |
+
def generate_fallback_with_distribution(
|
| 977 |
+
themes: List[str],
|
| 978 |
+
state: Dict,
|
| 979 |
+
profile_data: Dict,
|
| 980 |
+
expectation_data: Dict,
|
| 981 |
+
context: str = "",
|
| 982 |
+
) -> List[Dict]:
|
| 983 |
"""
|
| 984 |
+
Fallback generator that enforces the 5-5-10 distribution.
|
| 985 |
+
|
| 986 |
+
This version does NOT access missing keys like 'education' directly.
|
| 987 |
+
It uses generic wording so it never raises KeyError even if
|
| 988 |
+
profile_data or expectation_data is empty or partial.
|
| 989 |
"""
|
| 990 |
+
items: List[Dict] = []
|
| 991 |
+
|
| 992 |
+
# 1. Generate 5 PROFILE-BASED questions (generic but aligned with concept)
|
| 993 |
+
profile_questions = [
|
| 994 |
+
"How does your education background influence the way you learn new things?",
|
| 995 |
+
"How does your main work or job affect how you solve problems?",
|
| 996 |
+
"How do your hobbies or interests change the way you try new things?",
|
| 997 |
+
"How does your family background shape your way of talking and listening?",
|
| 998 |
+
"How does your living place and daily life affect your choices?",
|
|
|
|
|
|
|
| 999 |
]
|
| 1000 |
+
|
| 1001 |
+
for q in profile_questions:
|
| 1002 |
opts = [
|
| 1003 |
+
{"text": "Look at facts and think before deciding", "color": "blue"},
|
| 1004 |
+
{"text": "Make a clear plan and follow it", "color": "green"},
|
| 1005 |
+
{"text": "Act now and involve people", "color": "red"},
|
| 1006 |
+
{"text": "Think of new and different ways", "color": "yellow"},
|
| 1007 |
]
|
| 1008 |
random.shuffle(opts)
|
| 1009 |
+
items.append(
|
| 1010 |
+
{
|
| 1011 |
+
"question": q,
|
| 1012 |
+
"options": opts,
|
| 1013 |
+
"source": "fallback_profile",
|
| 1014 |
+
"question_type": "profile",
|
| 1015 |
+
}
|
| 1016 |
+
)
|
| 1017 |
+
|
| 1018 |
+
# 2. Generate 5 EXPECTATION-BASED questions (generic expectation focus)
|
| 1019 |
+
expectation_questions = [
|
| 1020 |
+
"When there is conflict in a relationship, what do you usually do first?",
|
| 1021 |
+
"When you and your partner handle money, what feels most natural to you?",
|
| 1022 |
+
"When your values and your partner's values differ, how do you respond?",
|
| 1023 |
+
"How do you balance work, rest, and time with your partner?",
|
| 1024 |
+
"When a personal boundary is close to being crossed, what do you usually do?",
|
| 1025 |
]
|
| 1026 |
+
|
| 1027 |
+
for q in expectation_questions:
|
| 1028 |
opts = [
|
| 1029 |
+
{"text": "Check all facts before reacting", "color": "blue"},
|
| 1030 |
+
{"text": "Use a calm, step-by-step way", "color": "green"},
|
| 1031 |
+
{"text": "Decide fast and take action", "color": "red"},
|
| 1032 |
+
{"text": "Look for a new creative solution", "color": "yellow"},
|
| 1033 |
]
|
| 1034 |
random.shuffle(opts)
|
| 1035 |
+
items.append(
|
| 1036 |
+
{
|
| 1037 |
+
"question": q,
|
| 1038 |
+
"options": opts,
|
| 1039 |
+
"source": "fallback_expectation",
|
| 1040 |
+
"question_type": "expectation",
|
| 1041 |
+
}
|
| 1042 |
+
)
|
| 1043 |
+
|
| 1044 |
# 3. Generate 10 CHARACTER-BASED questions from themes
|
| 1045 |
+
for i, theme in enumerate(themes[:10] or ["daily situation"] * 10):
|
| 1046 |
+
short = theme.split(" around ")[-1].strip() or "a daily situation"
|
| 1047 |
question = f"When dealing with {short}, what is your typical approach?"
|
| 1048 |
+
|
| 1049 |
opts = [
|
| 1050 |
+
{"text": "Look at facts and details first", "color": "blue"},
|
| 1051 |
+
{"text": "Make a step-by-step plan", "color": "green"},
|
| 1052 |
+
{"text": "Take charge and move things forward", "color": "red"},
|
| 1053 |
+
{"text": "Think of new and different ideas", "color": "yellow"},
|
| 1054 |
]
|
| 1055 |
random.shuffle(opts)
|
| 1056 |
+
items.append(
|
| 1057 |
+
{
|
| 1058 |
+
"question": question,
|
| 1059 |
+
"options": opts,
|
| 1060 |
+
"source": "fallback_character",
|
| 1061 |
+
"question_type": "character",
|
| 1062 |
+
}
|
| 1063 |
+
)
|
| 1064 |
+
|
| 1065 |
# Ensure we have exactly 20 questions
|
| 1066 |
return items[:20]
|
| 1067 |
|
| 1068 |
|
| 1069 |
class SessionState:
|
| 1070 |
+
def __init__(
|
| 1071 |
+
self,
|
| 1072 |
+
n_questions: int,
|
| 1073 |
+
batch_size: int,
|
| 1074 |
+
domain: str = "general",
|
| 1075 |
+
role: Optional[str] = None,
|
| 1076 |
+
profile: Optional[Dict] = None,
|
| 1077 |
+
):
|
| 1078 |
domain = (domain or role or "general").lower()
|
| 1079 |
self.domain = domain if domain in DOMAINS else "general"
|
| 1080 |
+
self.role = role or self.domain
|
| 1081 |
self.profile = profile or {}
|
| 1082 |
self.n_questions = max(1, min(n_questions, MAX_QUESTIONS))
|
| 1083 |
self.batch_size = max(1, batch_size)
|
|
|
|
| 1087 |
self.queue: List[Dict] = []
|
| 1088 |
self.finished = False
|
| 1089 |
self.used_topics: List[str] = []
|
| 1090 |
+
# Track question texts, used by LLM prompt
|
| 1091 |
+
self.history_of_questions: List[str] = []
|
| 1092 |
|
| 1093 |
def to_min_state(self) -> Dict:
|
| 1094 |
total = sum(self.color_counts.values()) or 1
|
| 1095 |
+
mix_percentages = {
|
| 1096 |
+
k: round((v / total) * 100, 2) for k, v in self.color_counts.items()
|
| 1097 |
+
}
|
| 1098 |
dominant = max(self.color_counts, key=self.color_counts.get) if total else None
|
| 1099 |
return {
|
| 1100 |
"asked": self.asked,
|
|
|
|
| 1108 |
def remaining(self) -> int:
|
| 1109 |
return self.n_questions - self.asked
|
| 1110 |
|
| 1111 |
+
|
| 1112 |
SESSIONS_FILE = os.getenv("PYMATCH_SESSIONS_FILE", "sessions.json")
|
| 1113 |
_sessions_lock = threading.Lock()
|
| 1114 |
SESSIONS: Dict[str, SessionState] = {}
|
| 1115 |
|
| 1116 |
+
|
| 1117 |
def save_sessions():
|
| 1118 |
try:
|
| 1119 |
with _sessions_lock:
|
|
|
|
| 1125 |
except Exception as e:
|
| 1126 |
print("Failed to save sessions:", e)
|
| 1127 |
|
| 1128 |
+
|
| 1129 |
def persist_final_progress(user_id: Optional[str], role: str, mix: Dict[str, float]) -> bool:
|
| 1130 |
from database import get_db_connection
|
| 1131 |
from config import PROGRESS_TBL
|
| 1132 |
+
|
| 1133 |
llm_id = str(uuid.uuid4())
|
| 1134 |
+
blue = float(mix.get("blue", 0.0))
|
| 1135 |
+
green = float(mix.get("green", 0.0))
|
| 1136 |
yellow = float(mix.get("yellow", 0.0))
|
| 1137 |
+
red = float(mix.get("red", 0.0))
|
| 1138 |
try:
|
| 1139 |
conn = get_db_connection()
|
| 1140 |
cur = conn.cursor()
|
| 1141 |
# Try with llm_id; if identity error, retry without it
|
| 1142 |
try:
|
| 1143 |
+
cur.execute(
|
| 1144 |
+
f"""
|
| 1145 |
INSERT INTO [dbo].[{PROGRESS_TBL}]
|
| 1146 |
([llm_id],[user_id],[role],[blue],[green],[yellow],[red],[created_at])
|
| 1147 |
VALUES (?,?,?,?,?,?,?,SYSUTCDATETIME())
|
| 1148 |
+
""",
|
| 1149 |
+
(
|
| 1150 |
+
llm_id,
|
| 1151 |
+
str(user_id) if user_id is not None else None,
|
| 1152 |
+
role,
|
| 1153 |
+
blue,
|
| 1154 |
+
green,
|
| 1155 |
+
yellow,
|
| 1156 |
+
red,
|
| 1157 |
+
),
|
| 1158 |
+
)
|
| 1159 |
conn.commit()
|
| 1160 |
return True
|
| 1161 |
except pyodbc.Error as e:
|
| 1162 |
if "IDENTITY_INSERT" in str(e) or "(544)" in str(e):
|
| 1163 |
+
cur.execute(
|
| 1164 |
+
f"""
|
| 1165 |
INSERT INTO [dbo].[{PROGRESS_TBL}]
|
| 1166 |
([user_id],[role],[blue],[green],[yellow],[red],[created_at])
|
| 1167 |
VALUES (?,?,?,?,?,?,SYSUTCDATETIME())
|
| 1168 |
+
""",
|
| 1169 |
+
(
|
| 1170 |
+
str(user_id) if user_id is not None else None,
|
| 1171 |
+
role,
|
| 1172 |
+
blue,
|
| 1173 |
+
green,
|
| 1174 |
+
yellow,
|
| 1175 |
+
red,
|
| 1176 |
+
),
|
| 1177 |
+
)
|
| 1178 |
conn.commit()
|
| 1179 |
return True
|
| 1180 |
else:
|
|
|
|
| 1184 |
print("Persist final progress failed:", ex)
|
| 1185 |
return False
|
| 1186 |
finally:
|
| 1187 |
+
try:
|
| 1188 |
+
conn.close()
|
| 1189 |
+
except Exception:
|
| 1190 |
+
pass
|
| 1191 |
+
|
| 1192 |
|
| 1193 |
def choose_themes(sess, k: int) -> List[str]:
|
| 1194 |
+
"""Choose k themes, preferably from FAISS TEXT_CHUNKS, else generic list."""
|
|
|
|
|
|
|
| 1195 |
try:
|
| 1196 |
from faiss_service import HAS_FAISS, FAISS_INDEX, TEXT_CHUNKS
|
| 1197 |
+
|
| 1198 |
if HAS_FAISS and FAISS_INDEX is not None and TEXT_CHUNKS:
|
| 1199 |
# Just grab k random chunks from the indexed document
|
| 1200 |
selected = random.sample(TEXT_CHUNKS, min(k, len(TEXT_CHUNKS)))
|
| 1201 |
+
# Wrap them as "themes" but really they are just context text
|
| 1202 |
return selected
|
| 1203 |
except ImportError:
|
| 1204 |
pass
|
| 1205 |
+
|
| 1206 |
# fallback: use generic themes
|
| 1207 |
fallback_themes = [
|
| 1208 |
+
"communication style",
|
| 1209 |
+
"conflict resolution",
|
| 1210 |
+
"decision making",
|
| 1211 |
+
"problem solving",
|
| 1212 |
+
"team collaboration",
|
| 1213 |
+
"personal values",
|
| 1214 |
+
"work habits",
|
| 1215 |
+
"social interaction",
|
| 1216 |
+
"stress management",
|
| 1217 |
+
"goal setting",
|
| 1218 |
+
"time management",
|
| 1219 |
+
"relationship dynamics",
|
| 1220 |
]
|
| 1221 |
+
return random.sample(fallback_themes, min(k, len(fallback_themes)))
|
routes/auth_routes.py
CHANGED
|
@@ -60,7 +60,7 @@ def get_db_connection():
|
|
| 60 |
|
| 61 |
# Read settings from environment variables
|
| 62 |
SQL_DRIVER = os.getenv("PYMATCH_SQL_DRIVER", "ODBC Driver 17 for SQL Server")
|
| 63 |
-
SQL_SERVER = os.getenv("PYMATCH_SQL_SERVER", r"
|
| 64 |
SQL_DB = os.getenv("PYMATCH_SQL_DB", "Py_Match")
|
| 65 |
SQL_TRUSTED = os.getenv("PYMATCH_SQL_TRUSTED", "yes").lower()
|
| 66 |
|
|
|
|
| 60 |
|
| 61 |
# Read settings from environment variables
|
| 62 |
SQL_DRIVER = os.getenv("PYMATCH_SQL_DRIVER", "ODBC Driver 17 for SQL Server")
|
| 63 |
+
SQL_SERVER = os.getenv("PYMATCH_SQL_SERVER", r"PYKARA")
|
| 64 |
SQL_DB = os.getenv("PYMATCH_SQL_DB", "Py_Match")
|
| 65 |
SQL_TRUSTED = os.getenv("PYMATCH_SQL_TRUSTED", "yes").lower()
|
| 66 |
|
routes/matching_routes.py
CHANGED
|
@@ -333,4 +333,101 @@ def unified_match(user_id=None):
|
|
| 333 |
"matches": matches_by_range,
|
| 334 |
"count": len(expectation_matches),
|
| 335 |
"mode": "expectation"
|
| 336 |
-
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 333 |
"matches": matches_by_range,
|
| 334 |
"count": len(expectation_matches),
|
| 335 |
"mode": "expectation"
|
| 336 |
+
})
|
| 337 |
+
|
| 338 |
+
|
| 339 |
+
@matching_bp.get("/compatibility-explanation")
|
| 340 |
+
def get_compatibility_explanation():
|
| 341 |
+
user_id = request.args.get("user_id", type=int)
|
| 342 |
+
target_user_id = request.args.get("target_user_id", type=int)
|
| 343 |
+
mode = request.args.get("mode", "expectation-only")
|
| 344 |
+
|
| 345 |
+
if not user_id or not target_user_id:
|
| 346 |
+
return jsonify({"error": "user_id and target_user_id are required"}), 400
|
| 347 |
+
|
| 348 |
+
try:
|
| 349 |
+
# TAB 1 β EXPECTATION ONLY (Rule-based)
|
| 350 |
+
if mode == "expectation-only":
|
| 351 |
+
exp_user = fetch_expectation_data(user_id)
|
| 352 |
+
profile_user = fetch_marriage_profile_data(target_user_id)
|
| 353 |
+
|
| 354 |
+
explanations = generate_expectation_explanation(exp_user, profile_user)
|
| 355 |
+
|
| 356 |
+
return jsonify({
|
| 357 |
+
"mode": "expectation-only",
|
| 358 |
+
"explanations": explanations,
|
| 359 |
+
"source": "expectation-fallback"
|
| 360 |
+
})
|
| 361 |
+
|
| 362 |
+
# TAB 2 β CHARACTER ONLY (LLM-ONLY)
|
| 363 |
+
elif mode == "character":
|
| 364 |
+
llm1 = LLMGeneratedQuestions.query.filter_by(user_id=user_id).first()
|
| 365 |
+
llm2 = LLMGeneratedQuestions.query.filter_by(user_id=target_user_id).first()
|
| 366 |
+
|
| 367 |
+
if not (llm1 and llm2):
|
| 368 |
+
return jsonify({
|
| 369 |
+
"mode": "character",
|
| 370 |
+
"explanations": [
|
| 371 |
+
"Character analysis unavailable - no personality data found for one or both users."
|
| 372 |
+
],
|
| 373 |
+
"source": "error"
|
| 374 |
+
})
|
| 375 |
+
|
| 376 |
+
u_vec = llm1.color_vec()
|
| 377 |
+
v_vec = llm2.color_vec()
|
| 378 |
+
|
| 379 |
+
print(f"π― Generating AI character analysis for users {user_id} and {target_user_id}...")
|
| 380 |
+
character_explanations = generate_character_llm_explanation(u_vec, v_vec)
|
| 381 |
+
|
| 382 |
+
return jsonify({
|
| 383 |
+
"mode": "character",
|
| 384 |
+
"explanations": character_explanations,
|
| 385 |
+
"source": "character-llm"
|
| 386 |
+
})
|
| 387 |
+
|
| 388 |
+
# TAB 3 β EXPECTATION + CHARACTER (Mixed)
|
| 389 |
+
elif mode == "expectation":
|
| 390 |
+
exp_user = fetch_expectation_data(user_id)
|
| 391 |
+
profile_user = fetch_marriage_profile_data(target_user_id)
|
| 392 |
+
|
| 393 |
+
expectation_part = generate_expectation_explanation(exp_user, profile_user)
|
| 394 |
+
|
| 395 |
+
llm1 = LLMGeneratedQuestions.query.filter_by(user_id=user_id).first()
|
| 396 |
+
llm2 = LLMGeneratedQuestions.query.filter_by(user_id=target_user_id).first()
|
| 397 |
+
|
| 398 |
+
character_explanations = []
|
| 399 |
+
if llm1 and llm2:
|
| 400 |
+
try:
|
| 401 |
+
u_vec = llm1.color_vec()
|
| 402 |
+
v_vec = llm2.color_vec()
|
| 403 |
+
character_explanations = generate_character_llm_explanation(u_vec, v_vec)
|
| 404 |
+
source_type = "character-llm"
|
| 405 |
+
except Exception as e:
|
| 406 |
+
print(f"π΄ LLM failed, using backend fallback: {e}")
|
| 407 |
+
character_explanations = generate_character_fallback_explanation(u_vec, v_vec)
|
| 408 |
+
source_type = "character-fallback"
|
| 409 |
+
else:
|
| 410 |
+
character_explanations = ["Character analysis unavailable for this user."]
|
| 411 |
+
source_type = "error"
|
| 412 |
+
|
| 413 |
+
final_output = expectation_part + ["", "π§ **AI Character Insights**"] + character_explanations
|
| 414 |
+
|
| 415 |
+
return jsonify({
|
| 416 |
+
"mode": "expectation",
|
| 417 |
+
"explanations": final_output,
|
| 418 |
+
"source": source_type
|
| 419 |
+
})
|
| 420 |
+
|
| 421 |
+
else:
|
| 422 |
+
return jsonify({"error": "Invalid mode"}), 400
|
| 423 |
+
|
| 424 |
+
except Exception as e:
|
| 425 |
+
print(f"π΄ Error in compatibility explanation: {e}")
|
| 426 |
+
return jsonify({
|
| 427 |
+
"explanations": [f"β Service temporarily unavailable: {str(e)}"],
|
| 428 |
+
"source": "error"
|
| 429 |
+
}), 500
|
| 430 |
+
|
| 431 |
+
|
| 432 |
+
|
| 433 |
+
|