pykara commited on
Commit
0165bee
Β·
1 Parent(s): 8737b49

backend fix

Browse files
Files changed (6) hide show
  1. app.py +139 -75
  2. config.py +1 -1
  3. faiss_service.py +132 -6
  4. llm_service.py +607 -334
  5. routes/auth_routes.py +1 -1
  6. routes/matching_routes.py +98 -1
app.py CHANGED
@@ -1,18 +1,45 @@
1
  APP_BUILD = "HF-BUILD-2025-12-15-01"
2
  print("βœ… RUNNING APP BUILD:", APP_BUILD, "FILE:", __file__)
3
- # app.py (HF-safe updated version)
 
4
  import os
5
  import datetime
6
  import traceback
 
7
  from flask import Flask, jsonify, request
8
  from flask_cors import CORS
9
 
 
 
 
 
10
  from config import (
11
- SQL_DRIVER, SQL_SERVER, SQL_DB, SQL_TRUSTED, SQL_USER, SQL_PASSWORD,
12
- SQL_PORT, SQL_ENCRYPT, SQL_TRUSTCERT, IS_HUGGING_FACE
 
 
 
 
 
 
 
 
 
13
  )
 
14
  from models import db
15
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
  def create_app():
18
  app = Flask(__name__)
@@ -28,7 +55,7 @@ def create_app():
28
  # ----------------------------
29
  @app.before_request
30
  def log_request_info():
31
- print(f"\n{'='*60}")
32
  print("πŸ“₯ INCOMING REQUEST:")
33
  print(f" Time: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
34
  print(f" Method: {request.method}")
@@ -38,7 +65,7 @@ def create_app():
38
  if request.user_agent:
39
  print(f" User Agent: {request.user_agent.string[:80]}...")
40
  print(f" Referrer: {request.referrer}")
41
- print(f"{'='*60}")
42
 
43
  # ----------------------------
44
  # DB init
@@ -60,7 +87,7 @@ def create_app():
60
  import routes as routes_module
61
  print("βœ… DEBUG: Imported routes module")
62
 
63
- # Get blueprints safely (if a blueprint is not created due to import error, it may be None)
64
  candidates = [
65
  ("auth_bp", getattr(routes_module, "auth_bp", None), "/api"),
66
  ("profiles_bp", getattr(routes_module, "profiles_bp", None), None),
@@ -69,8 +96,10 @@ def create_app():
69
  ("llm_bp", getattr(routes_module, "llm_bp", None), None),
70
  ]
71
 
72
- print("βœ… DEBUG: Blueprint objects (None means failed):",
73
- [bp.name if bp else None for _, bp, _ in candidates])
 
 
74
 
75
  for name, bp, prefix in candidates:
76
  if bp is None:
@@ -95,92 +124,126 @@ def create_app():
95
  print(f"❌ DEBUG: Failed to import routes or register blueprints: {e}")
96
  traceback.print_exc()
97
 
98
- # ----------------------------
99
  # Debug endpoints (always available)
100
- # ----------------------------
101
- @app.get("/api/health")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  @app.get("/health")
 
103
  def health():
104
- # Keep this endpoint simple and safe (do not crash if optional imports fail)
105
- faiss_loaded = False
106
- knowledge_loaded = False
107
  llm_mode = "offline-fallback"
108
-
109
  try:
110
- from llm_service import CHAIN_BATCH
111
  if CHAIN_BATCH is not None:
112
  llm_mode = "openai"
113
  except Exception:
114
  pass
115
 
116
- try:
117
- from faiss_service import knowledge, TEXT_CHUNKS
118
- faiss_loaded = bool(TEXT_CHUNKS) and len(TEXT_CHUNKS) > 0
119
- knowledge_loaded = knowledge is not None and hasattr(knowledge, "indices") and len(knowledge.indices) > 0
120
- except Exception:
121
- pass
122
-
123
- return jsonify({
124
- "status": "ok",
125
- "huggingface": bool(IS_HUGGING_FACE),
126
- "llm": llm_mode,
127
- "has_openai_key": bool(os.getenv("OPENAI_API_KEY")),
128
- "db": {"server": SQL_SERVER, "database": SQL_DB},
129
- "faiss_loaded": faiss_loaded,
130
- "knowledge_base_loaded": knowledge_loaded,
131
- "blueprints": blueprint_status
132
- })
133
-
134
- @app.get("/api/_routes")
135
- @app.get("/debug/routes")
136
- def list_routes():
137
- routes_list = []
138
- for rule in app.url_map.iter_rules():
139
- routes_list.append({
140
- "endpoint": rule.endpoint,
141
- "methods": sorted(list(rule.methods)),
142
- "rule": str(rule)
143
- })
144
 
145
- # Detect a common mistake: /api/api/...
146
- has_double_api = any(r["rule"].startswith("/api/api/") for r in routes_list)
 
 
 
 
 
147
 
148
- return jsonify({
149
- "count": len(routes_list),
150
- "has_double_api_prefix": has_double_api,
151
- "routes": sorted(routes_list, key=lambda x: x["rule"])
152
- })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
 
 
 
 
154
  @app.get("/")
155
  def home():
156
- return jsonify({
157
- "message": "Unified Py-Match Service (FAISS-enabled)",
158
- "try": [
159
- "GET /api/health",
160
- "GET /api/_routes",
161
- "POST /api/signup",
162
- "POST /api/login",
163
- "GET /api/questions/marriage",
164
- "GET /api/questions/existing-profile/marriage/<user_id>",
165
- "GET /api/expectation-questions",
166
- "GET /api/existing-preferences/<user_id>",
167
- ]
168
- })
 
 
 
 
 
 
 
 
169
 
170
- # ----------------------------
171
  # Error handlers
172
- # ----------------------------
173
  @app.errorhandler(404)
174
  def not_found(error):
175
  return jsonify({"error": "Endpoint not found", "path": request.path}), 404
176
 
177
  @app.errorhandler(405)
178
  def method_not_allowed(error):
179
- return jsonify({
180
- "error": "Method not allowed",
181
- "message": f"Method {request.method} not allowed for {request.path}",
182
- "allowed_methods": getattr(error, "valid_methods", [])
183
- }), 405
 
 
 
 
 
 
 
184
 
185
  @app.errorhandler(500)
186
  def internal_error(error):
@@ -189,10 +252,11 @@ def create_app():
189
  return app
190
 
191
 
192
- app = create_app()
193
-
194
  if __name__ == "__main__":
195
- print(f"\n{'='*60}")
 
 
196
  print("πŸš€ Flask server starting...")
197
- print(f"{'='*60}")
 
198
  app.run(host="0.0.0.0", port=5000, debug=True)
 
1
  APP_BUILD = "HF-BUILD-2025-12-15-01"
2
  print("βœ… RUNNING APP BUILD:", APP_BUILD, "FILE:", __file__)
3
+
4
+ # app.py (HF-safe + corrected health + debug routes)
5
  import os
6
  import datetime
7
  import traceback
8
+
9
  from flask import Flask, jsonify, request
10
  from flask_cors import CORS
11
 
12
+ # FAISS / knowledge
13
+ from faiss_service import FAISS_INDEX, TEXT_CHUNKS, HAS_FAISS, knowledge
14
+
15
+ # Config
16
  from config import (
17
+ SQL_DRIVER,
18
+ SQL_SERVER,
19
+ SQL_DB,
20
+ SQL_TRUSTED,
21
+ SQL_USER,
22
+ SQL_PASSWORD,
23
+ SQL_PORT,
24
+ SQL_ENCRYPT,
25
+ SQL_TRUSTCERT,
26
+ IS_HUGGING_FACE,
27
+ PROGRESS_TBL, # make sure this exists in config.py
28
  )
29
+
30
  from models import db
31
 
32
+ # LLM / chain imports (safe if module not present)
33
+ try:
34
+ from llm_service import CHAIN_BATCH
35
+ try:
36
+ from llm_service import llm_chain
37
+ except ImportError:
38
+ llm_chain = None
39
+ except ImportError:
40
+ CHAIN_BATCH = None
41
+ llm_chain = None
42
+
43
 
44
  def create_app():
45
  app = Flask(__name__)
 
55
  # ----------------------------
56
  @app.before_request
57
  def log_request_info():
58
+ print(f"\n{'=' * 60}")
59
  print("πŸ“₯ INCOMING REQUEST:")
60
  print(f" Time: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
61
  print(f" Method: {request.method}")
 
65
  if request.user_agent:
66
  print(f" User Agent: {request.user_agent.string[:80]}...")
67
  print(f" Referrer: {request.referrer}")
68
+ print(f"{'=' * 60}")
69
 
70
  # ----------------------------
71
  # DB init
 
87
  import routes as routes_module
88
  print("βœ… DEBUG: Imported routes module")
89
 
90
+ # Get blueprints safely (None means missing)
91
  candidates = [
92
  ("auth_bp", getattr(routes_module, "auth_bp", None), "/api"),
93
  ("profiles_bp", getattr(routes_module, "profiles_bp", None), None),
 
96
  ("llm_bp", getattr(routes_module, "llm_bp", None), None),
97
  ]
98
 
99
+ print(
100
+ "βœ… DEBUG: Blueprint objects (None means failed):",
101
+ [bp.name if bp else None for _, bp, _ in candidates],
102
+ )
103
 
104
  for name, bp, prefix in candidates:
105
  if bp is None:
 
124
  print(f"❌ DEBUG: Failed to import routes or register blueprints: {e}")
125
  traceback.print_exc()
126
 
127
+ # ------------------------------------------------------------------
128
  # Debug endpoints (always available)
129
+ # ------------------------------------------------------------------
130
+ @app.get("/api/_routes")
131
+ @app.get("/debug/routes")
132
+ def list_routes():
133
+ routes_list = []
134
+ for rule in app.url_map.iter_rules():
135
+ routes_list.append(
136
+ {
137
+ "endpoint": rule.endpoint,
138
+ "methods": sorted(list(rule.methods)),
139
+ "rule": str(rule),
140
+ }
141
+ )
142
+
143
+ has_double_api = any(r["rule"].startswith("/api/api/") for r in routes_list)
144
+
145
+ return jsonify(
146
+ {
147
+ "count": len(routes_list),
148
+ "has_double_api_prefix": has_double_api,
149
+ "routes": sorted(routes_list, key=lambda x: x["rule"]),
150
+ }
151
+ )
152
+
153
+ # ------------------------------------------------------------------
154
+ # Health endpoint (both /health and /api/health to avoid breaking clients)
155
+ # ------------------------------------------------------------------
156
  @app.get("/health")
157
+ @app.get("/api/health")
158
  def health():
159
+ # LLM mode
 
 
160
  llm_mode = "offline-fallback"
 
161
  try:
 
162
  if CHAIN_BATCH is not None:
163
  llm_mode = "openai"
164
  except Exception:
165
  pass
166
 
167
+ # FAISS status
168
+ faiss_chunks = len(TEXT_CHUNKS) if TEXT_CHUNKS is not None else 0
169
+ faiss_loaded = bool(HAS_FAISS and FAISS_INDEX is not None and faiss_chunks > 0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170
 
171
+ # Knowledge base status
172
+ if knowledge is not None and hasattr(knowledge, "indices"):
173
+ knowledge_indices_count = len(getattr(knowledge, "indices", []))
174
+ knowledge_loaded = knowledge_indices_count > 0
175
+ else:
176
+ knowledge_indices_count = 0
177
+ knowledge_loaded = False
178
 
179
+ return jsonify(
180
+ {
181
+ "status": "ok",
182
+ "huggingface": bool(IS_HUGGING_FACE), # keep this line (as you requested)
183
+ "llm": llm_mode,
184
+ "has_openai_key": bool(os.getenv("OPENAI_API_KEY")),
185
+ "db": {
186
+ "server": SQL_SERVER,
187
+ "database": SQL_DB,
188
+ "table": PROGRESS_TBL,
189
+ },
190
+ "faiss_available": HAS_FAISS,
191
+ "faiss_loaded": faiss_loaded,
192
+ "faiss_chunks": faiss_chunks,
193
+ "knowledge_base_loaded": knowledge_loaded,
194
+ "knowledge_indices": knowledge_indices_count,
195
+ "blueprints": blueprint_status,
196
+ }
197
+ )
198
 
199
+ # ------------------------------------------------------------------
200
+ # Home endpoint
201
+ # ------------------------------------------------------------------
202
  @app.get("/")
203
  def home():
204
+ return jsonify(
205
+ {
206
+ "message": "Unified Py-Match Service (FAISS-enabled)",
207
+ "try": [
208
+ "GET /health",
209
+ "GET /api/health",
210
+ "GET /api/_routes",
211
+ "GET /debug/routes",
212
+ "POST /api/signup",
213
+ "POST /api/login",
214
+ "GET /api/questions/marriage",
215
+ "GET /api/questions/existing-profile/marriage/<user_id>",
216
+ "GET /api/expectation-questions",
217
+ "GET /api/existing-preferences/<user_id>",
218
+ "POST /api/questions/submit-answers/<role>",
219
+ "POST /llm/start (body: { user_id, role, n_questions, batch_size })",
220
+ "POST /llm/next (body: { session_id, selected_color })",
221
+ "GET /api/match/<user_id> (query: ?role=<role>&limit=<num>)",
222
+ ],
223
+ }
224
+ )
225
 
226
+ # ------------------------------------------------------------------
227
  # Error handlers
228
+ # ------------------------------------------------------------------
229
  @app.errorhandler(404)
230
  def not_found(error):
231
  return jsonify({"error": "Endpoint not found", "path": request.path}), 404
232
 
233
  @app.errorhandler(405)
234
  def method_not_allowed(error):
235
+ return (
236
+ jsonify(
237
+ {
238
+ "error": "Method not allowed",
239
+ "message": f"Method {request.method} not allowed for {request.path}",
240
+ "allowed_methods": (
241
+ error.valid_methods if hasattr(error, "valid_methods") else []
242
+ ),
243
+ }
244
+ ),
245
+ 405,
246
+ )
247
 
248
  @app.errorhandler(500)
249
  def internal_error(error):
 
252
  return app
253
 
254
 
 
 
255
  if __name__ == "__main__":
256
+ app = create_app()
257
+
258
+ print(f"\n{'=' * 60}")
259
  print("πŸš€ Flask server starting...")
260
+ print(f"{'=' * 60}")
261
+
262
  app.run(host="0.0.0.0", port=5000, debug=True)
config.py CHANGED
@@ -21,7 +21,7 @@ if IS_HUGGING_FACE:
21
  DEFAULT_SQL_TRUSTED = "yes" # Use SQL authentication on Hugging Face
22
  else:
23
  # Local development configuration
24
- DEFAULT_SQL_SERVER = "localhost\sqlexpress"
25
  DEFAULT_SQL_DB = "Py_Match"
26
  DEFAULT_SQL_TRUSTED = "yes" # Use Windows authentication locally
27
 
 
21
  DEFAULT_SQL_TRUSTED = "yes" # Use SQL authentication on Hugging Face
22
  else:
23
  # Local development configuration
24
+ DEFAULT_SQL_SERVER = "PYKARA"
25
  DEFAULT_SQL_DB = "Py_Match"
26
  DEFAULT_SQL_TRUSTED = "yes" # Use Windows authentication locally
27
 
faiss_service.py CHANGED
@@ -4,7 +4,13 @@ import json
4
  import pickle
5
  import random
6
  from typing import Dict, List, Tuple, Optional
 
 
7
 
 
 
 
 
8
  # Try importing faiss
9
  try:
10
  import faiss
@@ -108,8 +114,104 @@ def try_load_chunks_from_disk(index_path: str) -> List[str]:
108
  print(f"Failed to load chunks from {c}:", e)
109
  return []
110
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
  def load_faiss_index(index_path: str):
112
- global FAISS_INDEX, TEXT_CHUNKS
113
  if not HAS_FAISS:
114
  print("FAISS not installed. Skipping index load.")
115
  return
@@ -118,14 +220,19 @@ def load_faiss_index(index_path: str):
118
  return
119
  try:
120
  FAISS_INDEX = faiss.read_index(index_path)
121
- # try to load chunks from companion files
122
  TEXT_CHUNKS = try_load_chunks_from_disk(index_path)
123
- if not TEXT_CHUNKS:
124
- print("Warning: Faiss index loaded but no companion text chunks found.")
125
- print("Provide a companion .chunks.json or .chunks.pkl file with a list of text chunks.")
 
 
 
 
126
  except Exception as e:
127
  print("Failed to load faiss index:", e)
128
  FAISS_INDEX = None
 
 
129
 
130
  def get_nearest_context(query_emb: List[float] = None, k: int = 5, query_vector: Optional[List[float]] = None):
131
  """Return concatenated top-k chunks for a query."""
@@ -157,6 +264,22 @@ def get_faiss_context(k=3):
157
 
158
  # Initialize knowledge base only if FAISS is available
159
  knowledge = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
  if HAS_FAISS:
161
  knowledge = KnowledgeSource()
162
  else:
@@ -165,4 +288,7 @@ else:
165
  class DummyKnowledge:
166
  def get_relevant_context(self, *args, **kwargs):
167
  return []
168
- knowledge = DummyKnowledge()
 
 
 
 
4
  import pickle
5
  import random
6
  from typing import Dict, List, Tuple, Optional
7
+ import re
8
+ from collections import defaultdict
9
 
10
+ import os
11
+
12
+ BASE_DIR = os.path.dirname(os.path.abspath(__file__))
13
+ FAISS_INDEX_PATH = os.path.join(BASE_DIR, "faiss_index_file.index") # Direct path
14
  # Try importing faiss
15
  try:
16
  import faiss
 
114
  print(f"Failed to load chunks from {c}:", e)
115
  return []
116
 
117
+ # Add this to faiss_service.py after the imports
118
+
119
+
120
+
121
+ def extract_color_behaviors_from_chunks(chunks: List[str]) -> Dict[str, List[str]]:
122
+ """
123
+ Extract color-specific behaviors from book chunks
124
+ Returns: {"blue": [behaviors], "green": [behaviors], "red": [behaviors], "yellow": [behaviors]}
125
+ """
126
+ color_keywords = {
127
+ "blue": [
128
+ "analytical", "fact-based", "data", "research", "analyze", "logic",
129
+ "detail", "thorough", "precision", "evidence", "numbers", "verify",
130
+ "critical", "skeptical", "methodical", "systematic", "investigate",
131
+ "examine", "study", "proof", "accuracy", "rational", "reasoning"
132
+ ],
133
+ "green": [
134
+ "organized", "process", "systematic", "routine", "plan", "structure",
135
+ "reliable", "consistent", "predictable", "bureaucratic", "procedural",
136
+ "methodical", "step-by-step", "orderly", "structured", "traditional",
137
+ "stable", "secure", "dependable", "regulated", "formal", "order"
138
+ ],
139
+ "red": [
140
+ "decisive", "action-oriented", "direct", "results", "take charge",
141
+ "impulsive", "controlling", "dominant", "assertive", "leadership",
142
+ "quick", "immediate", "practical", "hands-on", "confrontational",
143
+ "bold", "forceful", "authoritative", "dynamic", "active", "energetic"
144
+ ],
145
+ "yellow": [
146
+ "creative", "big-picture", "visionary", "innovative", "ideas",
147
+ "unrealistic", "scattered", "enthusiastic", "optimistic", "inspiring",
148
+ "imaginative", "exploratory", "experimental", "spontaneous", "free-thinking",
149
+ "inventive", "original", "artistic", "expressive", "playful", "curious"
150
+ ]
151
+ }
152
+
153
+ color_behaviors = defaultdict(list)
154
+
155
+ for chunk in chunks:
156
+ sentences = re.split(r'[.!?]+', chunk)
157
+ for sentence in sentences:
158
+ sentence = sentence.strip()
159
+ if len(sentence.split()) < 5 or len(sentence.split()) > 25:
160
+ continue # Skip too short or too long sentences
161
+
162
+ sentence_lower = sentence.lower()
163
+
164
+ # Find which color this sentence describes
165
+ for color, keywords in color_keywords.items():
166
+ if any(keyword in sentence_lower for keyword in keywords):
167
+ # Clean and format the behavior
168
+ behavior = sentence.strip()
169
+ # Remove quotes if present
170
+ behavior = behavior.replace('"', '').replace("'", "")
171
+ # Capitalize first letter
172
+ if behavior and behavior[0].islower():
173
+ behavior = behavior[0].upper() + behavior[1:]
174
+
175
+ # Avoid duplicates
176
+ if behavior not in color_behaviors[color]:
177
+ color_behaviors[color].append(behavior)
178
+
179
+ break
180
+
181
+ return dict(color_behaviors)
182
+
183
+ def load_color_examples():
184
+ """
185
+ Load color examples from the book chunks
186
+ """
187
+ global COLOR_EXAMPLES
188
+
189
+ if not TEXT_CHUNKS:
190
+ print("No text chunks loaded - cannot extract color examples")
191
+ COLOR_EXAMPLES = None
192
+ return
193
+
194
+ try:
195
+ COLOR_EXAMPLES = extract_color_behaviors_from_chunks(TEXT_CHUNKS)
196
+ print(f"Loaded color examples from book: {', '.join([f'{color}: {len(examples)}' for color, examples in COLOR_EXAMPLES.items()])}")
197
+
198
+ # Debug: Show sample behaviors
199
+ print("\n=== SAMPLE COLOR EXAMPLES ===")
200
+ for color, examples in COLOR_EXAMPLES.items():
201
+ print(f"\n{color.upper()} (first 3 examples):")
202
+ for i, example in enumerate(examples[:3], 1):
203
+ print(f" {i}. {example}")
204
+ print("=============================\n")
205
+ except Exception as e:
206
+ print(f"Failed to extract color examples: {e}")
207
+ COLOR_EXAMPLES = None
208
+
209
+ # Initialize COLOR_EXAMPLES
210
+ COLOR_EXAMPLES = None
211
+
212
+ # Update load_faiss_index to also load color examples
213
  def load_faiss_index(index_path: str):
214
+ global FAISS_INDEX, TEXT_CHUNKS, COLOR_EXAMPLES
215
  if not HAS_FAISS:
216
  print("FAISS not installed. Skipping index load.")
217
  return
 
220
  return
221
  try:
222
  FAISS_INDEX = faiss.read_index(index_path)
 
223
  TEXT_CHUNKS = try_load_chunks_from_disk(index_path)
224
+ if TEXT_CHUNKS:
225
+ print(f"Loaded {len(TEXT_CHUNKS)} text chunks")
226
+ # Extract color examples from chunks
227
+ load_color_examples()
228
+ else:
229
+ print("Warning: No text chunks found.")
230
+ COLOR_EXAMPLES = None
231
  except Exception as e:
232
  print("Failed to load faiss index:", e)
233
  FAISS_INDEX = None
234
+ COLOR_EXAMPLES = None
235
+
236
 
237
  def get_nearest_context(query_emb: List[float] = None, k: int = 5, query_vector: Optional[List[float]] = None):
238
  """Return concatenated top-k chunks for a query."""
 
264
 
265
  # Initialize knowledge base only if FAISS is available
266
  knowledge = None
267
+
268
+ # --- Auto-load main FAISS index on import ---
269
+ if HAS_FAISS:
270
+ try:
271
+ if os.path.exists(FAISS_INDEX_PATH):
272
+ print(f"[faiss_service] Loading FAISS index from: {FAISS_INDEX_PATH}")
273
+ load_faiss_index(FAISS_INDEX_PATH)
274
+ else:
275
+ print(f"[faiss_service] FAISS index file NOT found at: {FAISS_INDEX_PATH}")
276
+ except Exception as e:
277
+ print(f"[faiss_service] Error while loading FAISS index: {e}")
278
+ else:
279
+ print("[faiss_service] FAISS not installed, index will not be loaded.")
280
+
281
+
282
+
283
  if HAS_FAISS:
284
  knowledge = KnowledgeSource()
285
  else:
 
288
  class DummyKnowledge:
289
  def get_relevant_context(self, *args, **kwargs):
290
  return []
291
+ knowledge = DummyKnowledge()
292
+
293
+
294
+
llm_service.py CHANGED
@@ -1,4 +1,3 @@
1
- # llm_service.py
2
  import pyodbc
3
  import os
4
  import json
@@ -22,16 +21,21 @@ except Exception:
22
  HAS_LLM_STACK = False
23
  HAS_LLM = False
24
 
 
25
  class Option(BaseModel):
26
  text: str
27
  color: str
28
 
 
29
  class QAItem(BaseModel):
30
  question: str
31
  options: List[Option] = Field(min_items=4, max_items=4)
32
 
 
33
  class BatchQA(BaseModel):
34
  items: List[QAItem] = Field(..., min_items=1)
 
 
35
  SYSTEM_PROMPT = (
36
  "You write marriage compatibility assessment questions that reveal four personality colors through forced choices:\n"
37
  "- blue=analytical, fact-based (positive: thorough, precise | negative: overly critical, data-obsessed)\n"
@@ -65,8 +69,7 @@ SYSTEM_PROMPT = (
65
 
66
  USER_PROMPT_BATCH = (
67
  "Context (from Surrounded by Idiots or other corpus):\n{context}\n\n"
68
- "Question Type: {question_type}\n\n" # Add this line
69
-
70
  "User Profile (Current Background):\n"
71
  "- Education: {education}\n"
72
  "- Employment: {employment}\n"
@@ -89,12 +92,10 @@ USER_PROMPT_BATCH = (
89
  "Themes (array of short strings): {themes_json}\n"
90
  "Previously asked questions: {previous_questions}\n\n"
91
  "{format_instructions}\n\n"
92
-
93
  "Generate {question_type} questions:\n"
94
  "- If QUESTION TYPE is 'profile': Generate 5 questions using ONLY profile data (education, employment, hobbies, family background, current lifestyle)\n"
95
  "- If QUESTION TYPE is 'expectation': Generate 5 questions using ONLY expectation data (conflict style, financial preferences, values, deal breakers)\n"
96
  "- If QUESTION TYPE is 'character': Generate 10 questions about CURRENT behavior in various life situations\n\n"
97
-
98
  "CRITICAL RULES:\n"
99
  "1) DO NOT use prefixes like 'Based on your profile' or 'Considering your expectations'\n"
100
  "2) Questions should be natural and flow conversationally\n"
@@ -113,7 +114,7 @@ CHAIN_BATCH = None
113
  if HAS_LLM_STACK and os.getenv("OPENAI_API_KEY"):
114
  try:
115
  PARSER_BATCH = PydanticOutputParser(pydantic_object=BatchQA)
116
-
117
  def build_batch_chain():
118
  llm = ChatOpenAI(
119
  model="gpt-4o-mini",
@@ -122,10 +123,12 @@ if HAS_LLM_STACK and os.getenv("OPENAI_API_KEY"):
122
  timeout=30,
123
  model_kwargs={"response_format": {"type": "json_object"}},
124
  )
125
- prompt = ChatPromptTemplate.from_messages([
126
- ("system", SYSTEM_PROMPT),
127
- ("user", USER_PROMPT_BATCH),
128
- ])
 
 
129
  return prompt | llm | PARSER_BATCH
130
 
131
  CHAIN_BATCH = build_batch_chain()
@@ -133,50 +136,88 @@ if HAS_LLM_STACK and os.getenv("OPENAI_API_KEY"):
133
  print("Failed to build CHAIN_BATCH:", e)
134
  CHAIN_BATCH = None
135
 
 
136
  def ensure_valid_colors(options: List[Dict]) -> List[Dict]:
137
  seen, fixed = set(), []
138
  defaults = {
139
- "blue": "Verify facts and numbers",
140
- "green": "Outline a clear process",
141
- "red": "Coordinate people and act",
142
- "yellow": "Propose a fresh idea",
143
  }
144
  for o in options:
145
  c = str(o.get("color", "")).lower()
146
  t = str(o.get("text", "")).strip()
147
  if c in COLOR_KEYS and c not in seen and t:
148
- seen.add(c); fixed.append({"text": t[:80], "color": c})
 
149
  for c in COLOR_KEYS:
150
  if c not in seen:
151
  fixed.append({"text": defaults[c], "color": c})
152
  return fixed[:4]
153
 
 
154
  def summarize_profile(profile: Dict) -> Dict:
155
  """Extract all non-PII columns from Marriage table for LLM context"""
156
  out: Dict = {}
157
-
158
  # All columns from Marriage table (excluding PII where possible)
159
  marriage_columns = [
160
- "user_id", "full_name", "gender", "current_city", "marital_status",
161
- "education_level", "employment_status", "number_of_siblings", "family_type",
162
- "hobbies_interests", "conflict_approach", "financial_style", "income_range",
163
- "relocation_willingness", "height", "skin_tone", "languages_spoken", "country",
164
- "blood_group", "religion", "dual_citizenship", "siblings_position",
165
- "parents_living_status", "live_with_parents", "support_parents_financially",
166
- "family_communication_frequency", "food_preference", "smoking_habit",
167
- "alcohol_habit", "daily_routine", "fitness_level", "own_pets", "travel_preference",
168
- "relaxation_mode", "job_role", "work_experience_years", "career_aspirations",
169
- "field_of_study", "remark", "children_timeline", "open_to_adoption",
170
- "deal_breakers", "other_non_negotiables", "health_constraints", "live_with_inlaws"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
  ]
172
-
173
  for col in marriage_columns:
174
  v = profile.get(col)
175
  if v not in (None, "", []):
176
  out[col] = v
177
-
178
  return out
179
 
 
180
  def offline_generate_batch(themes: List[str], state: Dict, context: str = "") -> List[Dict]:
181
  prof = state.get("profile", {}) or {}
182
  name = prof.get("full_name") or "Partner"
@@ -206,7 +247,7 @@ def offline_generate_batch(themes: List[str], state: Dict, context: str = "") ->
206
 
207
  # incorporate small bit from context if available (first 120 chars)
208
  if context:
209
- ctx_snip = context.replace('\n', ' ')[:120]
210
  q = f"{q} (Note: {ctx_snip})"
211
 
212
  # Keep concise
@@ -214,161 +255,191 @@ def offline_generate_batch(themes: List[str], state: Dict, context: str = "") ->
214
  q = " ".join(q.split()[:20])
215
 
216
  opts = [
217
- {"text": "Check data and facts", "color": "blue"},
218
- {"text": "Draft a step-by-step plan", "color": "green"},
219
- {"text": "Align people and act", "color": "red"},
220
- {"text": "Brainstorm bold ideas", "color": "yellow"},
221
  ]
222
  random.shuffle(opts)
223
  items.append({"question": q, "options": opts, "source": "fallback"})
224
  return items
225
 
226
 
227
- def generate_category_specific_options(question_type: str, question_text: str, profile_data: Dict = None, expectation_data: Dict = None) -> List[Dict]:
228
- """
229
- Generate options tailored to the question category and content
230
- """
 
 
 
231
  question_lower = question_text.lower()
232
-
233
  # Extract key themes from question for better contextualization
234
  themes_in_question = []
235
- for theme in ["learning", "problem", "conflict", "money", "family", "work", "social", "stress", "decision", "plan"]:
 
 
 
 
 
 
 
 
 
 
 
236
  if theme in question_lower:
237
  themes_in_question.append(theme)
238
-
239
- # Default option templates for each color
240
  base_options = {
241
  "blue": {
242
- "profile": "Research thoroughly and analyze all available data",
243
- "expectation": "Gather detailed information before forming an opinion",
244
- "character": "Analyze the situation carefully with facts and logic"
245
  },
246
  "green": {
247
- "profile": "Follow a structured, step-by-step approach",
248
- "expectation": "Establish clear rules and procedures",
249
- "character": "Create an organized plan and stick to it"
250
  },
251
  "red": {
252
- "profile": "Take decisive action to address the situation",
253
- "expectation": "Take charge and make things happen quickly",
254
- "character": "Act immediately and coordinate people involved"
255
  },
256
  "yellow": {
257
- "profile": "Explore creative possibilities and new approaches",
258
- "expectation": "Consider innovative solutions and future potential",
259
- "character": "Brainstorm creative ideas and possibilities"
260
- }
261
  }
262
-
263
- # Contextual variations based on question themes
264
  contextual_variations = {
265
  "learning": {
266
- "blue": "Study methodically and verify all information",
267
- "green": "Follow the curriculum in an organized manner",
268
- "red": "Jump into practical application immediately",
269
- "yellow": "Explore unconventional learning methods"
270
  },
271
  "problem": {
272
- "blue": "Analyze root causes with data",
273
- "green": "Systematically troubleshoot each component",
274
- "red": "Take immediate corrective action",
275
- "yellow": "Find innovative workarounds"
276
  },
277
  "conflict": {
278
- "blue": "Analyze perspectives logically",
279
- "green": "Establish fair mediation process",
280
- "red": "Address it directly and decisively",
281
- "yellow": "Find creative compromise"
282
  },
283
  "money": {
284
- "blue": "Analyze financial data thoroughly",
285
- "green": "Budget systematically and track expenses",
286
- "red": "Make decisive investment choices",
287
- "yellow": "Explore unconventional earning opportunities"
288
  },
289
  "family": {
290
- "blue": "Analyze family dynamics logically",
291
- "green": "Maintain family traditions and routines",
292
- "red": "Take leadership in family matters",
293
- "yellow": "Introduce new family activities"
294
- }
295
  }
296
-
297
  # Start with base options for the category
298
  options = []
299
  for color in COLOR_KEYS:
300
  base_text = base_options[color][question_type]
301
-
302
  # Add contextual variation if theme matches
303
  for theme, variations in contextual_variations.items():
304
  if theme in themes_in_question:
305
  base_text = variations[color]
306
  break
307
-
308
- # Add shadow/negative aspects for realism
309
  shadow_aspects = {
310
  "blue": {
311
- "profile": " (but can get stuck in analysis)",
312
- "expectation": " (but may overanalyze)",
313
- "character": " (but can be overly critical)"
314
  },
315
  "green": {
316
- "profile": " (but can be too rigid)",
317
- "expectation": " (but may create bureaucracy)",
318
- "character": " (but can resist change)"
319
  },
320
  "red": {
321
- "profile": " (but can be impulsive)",
322
- "expectation": " (but may be controlling)",
323
- "character": " (but can overlook details)"
324
  },
325
  "yellow": {
326
- "profile": " (but can be unrealistic)",
327
- "expectation": " (but may lack follow-through)",
328
- "character": " (but can be scattered)"
329
- }
330
  }
331
-
332
  # Only add shadow aspects occasionally (30% chance) for variety
333
  if random.random() < 0.3:
334
  shadow = shadow_aspects[color][question_type]
335
- # Ensure we don't exceed word limit
336
  if len(base_text.split()) + len(shadow.split()) <= 15:
337
  base_text += shadow
338
-
339
- options.append({
340
- "text": base_text[:80], # Limit length
341
- "color": color
342
- })
343
-
344
- return options
345
 
 
 
 
 
 
 
 
 
346
 
347
 
 
 
 
 
 
348
 
 
 
 
 
 
 
349
 
350
 
351
- def generate_batch_questions(themes: List[str], state: Dict, context: str = "", previous_questions: List[str] = None) -> List[Dict]:
 
 
 
 
 
352
  # Extract ALL data from Marriage table
353
  profile = state.get("profile", {})
354
  user_id = profile.get("user_id")
355
-
356
  try:
357
  from database import fetch_expectation_data
 
358
  expectation_data = fetch_expectation_data(user_id) if user_id else {}
359
  except ImportError:
360
  expectation_data = {}
361
-
362
  # Extract ALL profile data from Marriage table
363
  profile_data = {
364
  # Personal Information
365
  "full_name": profile.get("full_name", "Not specified"),
366
-
367
  "gender": profile.get("gender", "Not specified"),
368
  "current_city": profile.get("current_city", "Not specified"),
369
  "country": profile.get("country", "Not specified"),
370
  "marital_status": profile.get("marital_status", "Not specified"),
371
-
372
  # Education & Career
373
  "education_level": profile.get("education_level", "Not specified"),
374
  "employment_status": profile.get("employment_status", "Not specified"),
@@ -377,23 +448,24 @@ def generate_batch_questions(themes: List[str], state: Dict, context: str = "",
377
  "career_aspirations": profile.get("career_aspirations", "Not specified"),
378
  "field_of_study": profile.get("field_of_study", "Not specified"),
379
  "income_range": profile.get("income_range", "Not specified"),
380
-
381
  # Family & Background
382
  "number_of_siblings": profile.get("number_of_siblings", "Not specified"),
383
  "family_type": profile.get("family_type", "Not specified"),
384
  "siblings_position": profile.get("siblings_position", "Not specified"),
385
  "parents_living_status": profile.get("parents_living_status", "Not specified"),
386
  "live_with_parents": profile.get("live_with_parents", "Not specified"),
387
- "support_parents_financially": profile.get("support_parents_financially", "Not specified"),
388
- "family_communication_frequency": profile.get("family_communication_frequency", "Not specified"),
389
-
 
 
 
390
  # Physical & Health
391
  "height": profile.get("height", "Not specified"),
392
  "skin_tone": profile.get("skin_tone", "Not specified"),
393
  "blood_group": profile.get("blood_group", "Not specified"),
394
  "health_constraints": profile.get("health_constraints", "Not specified"),
395
  "fitness_level": profile.get("fitness_level", "Not specified"),
396
-
397
  # Lifestyle & Habits
398
  "hobbies_interests": str(profile.get("hobbies_interests", "Not specified")),
399
  "conflict_approach": profile.get("conflict_approach", "Not specified"),
@@ -405,26 +477,22 @@ def generate_batch_questions(themes: List[str], state: Dict, context: str = "",
405
  "own_pets": profile.get("own_pets", "Not specified"),
406
  "travel_preference": profile.get("travel_preference", "Not specified"),
407
  "relaxation_mode": profile.get("relaxation_mode", "Not specified"),
408
-
409
  # Languages & Relocation
410
  "languages_spoken": profile.get("languages_spoken", "Not specified"),
411
  "relocation_willingness": profile.get("relocation_willingness", "Not specified"),
412
-
413
  # Religion & Citizenship
414
  "religion": profile.get("religion", "Not specified"),
415
  "dual_citizenship": profile.get("dual_citizenship", "Not specified"),
416
-
417
  # Relationship Preferences
418
  "children_timeline": profile.get("children_timeline", "Not specified"),
419
  "open_to_adoption": profile.get("open_to_adoption", "Not specified"),
420
  "deal_breakers": profile.get("deal_breakers", "Not specified"),
421
  "other_non_negotiables": profile.get("other_non_negotiables", "Not specified"),
422
  "live_with_inlaws": profile.get("live_with_inlaws", "Not specified"),
423
-
424
  # Additional Info
425
  "remark": profile.get("remark", "Not specified"),
426
  }
427
-
428
  # Extract ALL expectation data from ExpectationResponse table
429
  expectation_data_dict = {
430
  # Basic Preferences
@@ -433,75 +501,108 @@ def generate_batch_questions(themes: List[str], state: Dict, context: str = "",
433
  "pref_current_city": expectation_data.get("pref_current_city", "Not specified"),
434
  "pref_countries": expectation_data.get("pref_countries", "Not specified"),
435
  "pref_languages": expectation_data.get("pref_languages", "Not specified"),
436
- "pref_education_level": expectation_data.get("pref_education_level", "Not specified"),
437
- "pref_employment_status": expectation_data.get("pref_employment_status", "Not specified"),
438
-
 
 
 
439
  # Health & Lifestyle
440
  "health_constraints": expectation_data.get("health_constraints", "Not specified"),
441
  "pref_diet": expectation_data.get("pref_diet", "Not specified"),
442
  "accept_smoking": expectation_data.get("accept_smoking", "Not specified"),
443
  "accept_alcohol": expectation_data.get("accept_alcohol", "Not specified"),
444
  "pref_fitness": expectation_data.get("pref_fitness", "Not specified"),
445
-
446
  # Family & Living
447
  "pref_family_type": expectation_data.get("pref_family_type", "Not specified"),
448
  "live_with_inlaws": expectation_data.get("live_with_inlaws", "Not specified"),
449
  "children_timeline": expectation_data.get("children_timeline", "Not specified"),
450
  "open_to_adoption": expectation_data.get("open_to_adoption", "Not specified"),
451
- "pref_live_with_parents": expectation_data.get("pref_live_with_parents", "Not specified"),
452
- "financial_support_to_parents": expectation_data.get("financial_support_to_parents", "Not specified"),
453
-
 
 
 
454
  # Conflict & Finance
455
- "pref_conflict_approach": expectation_data.get("pref_conflict_approach", "Not specified"),
456
- "pref_financial_style": expectation_data.get("pref_financial_style", "Not specified"),
 
 
 
 
457
  "pref_income_range": expectation_data.get("pref_income_range", "Not specified"),
458
-
459
  # Values & Compatibility
460
- "religion_alignment": expectation_data.get("religion_alignment", "Not specified"),
461
- "pref_shared_hobbies": expectation_data.get("pref_shared_hobbies", "Not specified"),
 
 
 
 
462
  "travel_pref": expectation_data.get("travel_pref", "Not specified"),
463
  "pet_pref": expectation_data.get("pet_pref", "Not specified"),
464
-
465
  # Career & Relocation
466
- "pref_partner_relocation": expectation_data.get("pref_partner_relocation", "Not specified"),
467
- "pref_career_aspirations": expectation_data.get("pref_career_aspirations", "Not specified"),
468
-
 
 
 
469
  # Additional Preferences
470
  "marital_status": expectation_data.get("marital_status", "Not specified"),
471
  "skin_tone": expectation_data.get("skin_tone", "Not specified"),
472
  "daily_routine": expectation_data.get("daily_routine", "Not specified"),
473
- "family_communication_frequency": expectation_data.get("family_communication_frequency", "Not specified"),
 
 
474
  "relaxation_mode": expectation_data.get("relaxation_mode", "Not specified"),
475
-
476
  # Non-negotiables
477
  "deal_breakers": expectation_data.get("deal_breakers", "Not specified"),
478
- "other_non_negotiables": expectation_data.get("other_non_negotiables", "Not specified"),
479
-
 
480
  # Summary
481
  "expectation_summary": expectation_data.get("expectation_summary", "Not specified"),
482
  "_mandatory_fields": expectation_data.get("_mandatory_fields", "Not specified"),
483
  }
484
-
 
 
 
 
 
 
 
 
 
 
485
  if CHAIN_BATCH is not None and PARSER_BATCH is not None:
486
  try:
487
  items: List[Dict] = []
488
-
489
  # 1. PROFILE-BASED QUESTIONS (5 questions) - Using ALL Marriage table columns
490
  profile_prompt = {
491
  "state": json.dumps(state, ensure_ascii=False),
492
  "themes_json": json.dumps(["profile"] * 5, ensure_ascii=False),
493
- "previous_questions": json.dumps(previous_questions or [], ensure_ascii=False),
 
 
494
  "format_instructions": PARSER_BATCH.get_format_instructions(),
495
  "context": "Generate 5 PROFILE-BASED questions using ALL available user background information.",
496
  "question_type": "profile",
497
-
498
  # Use ALL profile data
499
  "education": f"{profile_data['education_level']} | {profile_data['field_of_study']}",
500
  "employment": f"{profile_data['employment_status']} | {profile_data['job_role']} ({profile_data['work_experience_years']} years)",
501
- "hobbies": profile_data['hobbies_interests'],
502
  "family_type": f"{profile_data['family_type']} | Siblings: {profile_data['number_of_siblings']} | Position: {profile_data['siblings_position']}",
503
- "current_lifestyle": f"City: {profile_data['current_city']}, {profile_data['country']} | Height: {profile_data['height']} | Languages: {profile_data['languages_spoken']} | Religion: {profile_data['religion']} | Fitness: {profile_data['fitness_level']} | Diet: {profile_data['food_preference']} | Habits: Smoking: {profile_data['smoking_habit']}, Alcohol: {profile_data['alcohol_habit']}",
504
-
 
 
 
 
 
 
505
  # Expectation data marked as irrelevant
506
  "conflict_style": "IRRELEVANT_FOR_PROFILE_QUESTIONS",
507
  "financial_style": "IRRELEVANT_FOR_PROFILE_QUESTIONS",
@@ -514,127 +615,211 @@ def generate_batch_questions(themes: List[str], state: Dict, context: str = "",
514
  "ambition_pref": "IRRELEVANT_FOR_PROFILE_QUESTIONS",
515
  "deal_breakers": "IRRELEVANT_FOR_PROFILE_QUESTIONS",
516
  }
517
-
518
  result = CHAIN_BATCH.invoke(profile_prompt)
519
  profile_items = get_items_from_result(result)
520
-
521
  for qa in profile_items[:5]:
522
  out = qa.dict() if hasattr(qa, "dict") else dict(qa)
523
- out["options"] = generate_category_specific_options(
524
- "profile",
525
- out.get("question", ""),
526
- profile_data,
527
- None
 
 
 
 
 
 
 
 
 
 
 
 
 
528
  )
529
- out["source"] = "llm_profile"
530
  out["question_type"] = "profile"
531
  random.shuffle(out["options"])
532
  items.append(out)
533
-
534
  # 2. EXPECTATION-BASED QUESTIONS (5 questions) - Using ALL ExpectationResponse columns
535
  expectation_prompt = {
536
  "state": json.dumps(state, ensure_ascii=False),
537
  "themes_json": json.dumps(["expectation"] * 5, ensure_ascii=False),
538
- "previous_questions": json.dumps([q["question"] for q in items] + (previous_questions or []), ensure_ascii=False),
 
 
 
539
  "format_instructions": PARSER_BATCH.get_format_instructions(),
540
  "context": "Generate 5 EXPECTATION-BASED questions using ALL relationship preferences and expectations.",
541
  "question_type": "expectation",
542
-
543
  # Minimal profile context
544
  "education": "Background context only",
545
  "employment": "Background context only",
546
  "hobbies": "Background context only",
547
  "family_type": "Background context only",
548
  "current_lifestyle": "General context",
549
-
550
  # Use ALL expectation data
551
  "conflict_style": f"{expectation_data_dict['pref_conflict_approach']}",
552
- "financial_style": f"{expectation_data_dict['pref_financial_style']} | Income: {expectation_data_dict['pref_income_range']}",
553
- "income_range": expectation_data_dict['pref_income_range'],
554
- "relocation_willingness": f"{expectation_data_dict['pref_partner_relocation']}",
555
- "family_values": f"{expectation_data_dict['pref_family_type']} | Live with in-laws: {expectation_data_dict['live_with_inlaws']} | Children timeline: {expectation_data_dict['children_timeline']}",
556
- "core_values": f"Religion: {expectation_data_dict['religion_alignment']} | Deal breakers: {expectation_data_dict['deal_breakers']}",
557
- "lifestyle_pref": f"Fitness: {expectation_data_dict['pref_fitness']} | Diet: {expectation_data_dict['pref_diet']} | Daily routine: {expectation_data_dict['daily_routine']}",
558
- "social_pref": f"Hobbies: {expectation_data_dict['pref_shared_hobbies']} | Travel: {expectation_data_dict['travel_pref']} | Pets: {expectation_data_dict['pet_pref']}",
559
- "ambition_pref": f"Career: {expectation_data_dict['pref_career_aspirations']} | Education: {expectation_data_dict['pref_education_level']}",
560
- "deal_breakers": f"{expectation_data_dict['deal_breakers']} | Other non-negotiables: {expectation_data_dict['other_non_negotiables']}",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
561
  }
562
-
563
  result = CHAIN_BATCH.invoke(expectation_prompt)
564
  expectation_items = get_items_from_result(result)
565
-
566
  for qa in expectation_items[:5]:
567
  out = qa.dict() if hasattr(qa, "dict") else dict(qa)
568
- out["options"] = generate_category_specific_options(
569
- "expectation",
570
- out.get("question", ""),
571
- None,
572
- expectation_data_dict
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
573
  )
574
- out["source"] = "llm_expectation"
575
  out["question_type"] = "expectation"
576
  random.shuffle(out["options"])
577
  items.append(out)
578
-
579
  # 3. CHARACTER-BASED QUESTIONS (10 questions) - Using data from BOTH tables
580
  character_prompt = {
581
  "state": json.dumps(state, ensure_ascii=False),
582
  "themes_json": json.dumps(themes[:10], ensure_ascii=False),
583
- "previous_questions": json.dumps([q["question"] for q in items] + (previous_questions or []), ensure_ascii=False),
 
 
 
584
  "format_instructions": PARSER_BATCH.get_format_instructions(),
585
- "context": context[:2000] + "\n\nGenerate 10 CHARACTER-BASED questions using ALL available data.",
 
586
  "question_type": "character",
587
-
588
  # All data from Marriage table
589
- "education": profile_data['education_level'],
590
- "employment": profile_data['employment_status'],
591
- "hobbies": profile_data['hobbies_interests'],
592
- "family_type": profile_data['family_type'],
593
- "current_lifestyle": f"{profile_data['current_city']}, {profile_data['country']} | {profile_data['daily_routine']} | Relaxation: {profile_data['relaxation_mode']}",
594
-
 
 
595
  # All data from ExpectationResponse table
596
- "conflict_style": expectation_data_dict['pref_conflict_approach'],
597
- "financial_style": expectation_data_dict['pref_financial_style'],
598
- "income_range": expectation_data_dict['pref_income_range'],
599
- "relocation_willingness": expectation_data_dict['pref_partner_relocation'],
600
- "family_values": expectation_data_dict['pref_family_type'],
601
- "core_values": expectation_data_dict['religion_alignment'],
602
- "lifestyle_pref": expectation_data_dict['pref_fitness'],
603
- "social_pref": expectation_data_dict['pref_shared_hobbies'],
604
- "ambition_pref": expectation_data_dict['pref_career_aspirations'],
605
- "deal_breakers": expectation_data_dict['deal_breakers'],
 
 
606
  }
607
-
608
  result = CHAIN_BATCH.invoke(character_prompt)
609
  character_items = get_items_from_result(result)
610
-
611
  for qa in character_items[:10]:
612
  out = qa.dict() if hasattr(qa, "dict") else dict(qa)
613
- out["options"] = generate_category_specific_options(
614
- "character",
615
- out.get("question", ""),
616
- profile_data,
617
- expectation_data_dict
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
618
  )
619
- out["source"] = "llm_character"
620
  out["question_type"] = "character"
621
  random.shuffle(out["options"])
622
  items.append(out)
623
-
624
  # Verify we have exactly 20 questions
625
  if len(items) == 20:
626
  return items[:20]
627
  else:
628
- # If LLM didn't generate enough, fill with fallback
629
- return fill_missing_questions(items, themes, state, profile_data, expectation_data_dict, context)
630
-
 
 
631
  except Exception as e:
632
  print("LLM batch generation failed:", e)
633
- return generate_fallback_with_distribution(themes, state, profile_data, expectation_data_dict, context)
 
 
634
  else:
635
- return generate_fallback_with_distribution(themes, state, profile_data, expectation_data_dict, context)
636
-
637
-
638
 
639
 
640
  def get_items_from_result(result):
@@ -646,92 +831,125 @@ def get_items_from_result(result):
646
  else:
647
  return []
648
 
649
- def fill_missing_questions(current_items: List[Dict], themes: List[str], state: Dict,
650
- profile_data: Dict, expectation_data: Dict, context: str = "") -> List[Dict]:
 
 
 
 
 
 
 
651
  """Fill missing questions to reach 20 total"""
652
  items = current_items.copy()
653
-
654
  # Count current distribution
655
  profile_count = sum(1 for q in items if q.get("question_type") == "profile")
656
  expectation_count = sum(1 for q in items if q.get("question_type") == "expectation")
657
  character_count = sum(1 for q in items if q.get("question_type") == "character")
658
-
659
  # Fill profile questions if needed
660
  while profile_count < 5:
661
  profile_q = generate_profile_question(state, profile_data)
662
  items.append(profile_q)
663
  profile_count += 1
664
-
665
  # Fill expectation questions if needed
666
  while expectation_count < 5:
667
  expectation_q = generate_expectation_question(state, expectation_data)
668
  items.append(expectation_q)
669
  expectation_count += 1
670
-
671
  # Fill character questions if needed
672
  while character_count < 10:
673
  theme = themes[character_count % len(themes)] if themes else "daily situation"
674
  character_q = generate_character_question(theme, state)
675
  items.append(character_q)
676
  character_count += 1
677
-
678
  return items[:20]
679
 
 
680
  def generate_profile_question(state: Dict, profile_data: Dict) -> Dict:
681
  """Generate a single profile question"""
682
  prof = state.get("profile", {})
683
  name = prof.get("full_name") or "Partner"
684
-
685
  profile_topics = [
686
- ("education", f"How does your educational background shape how you approach complex information?"),
687
- ("employment", f"What methods from your professional life do you apply to personal challenges?"),
688
- ("hobbies", f"How do your personal interests influence your approach to new experiences?"),
689
- ("family", f"What communication patterns from your family background feel most natural to you?"),
690
- ("background", f"How does your personal history affect your current decision-making style?")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
691
  ]
692
-
693
- topic_idx = len([q for q in state.get("history", []) if q.get("question_type") == "profile"])
 
 
694
  if topic_idx >= len(profile_topics):
695
  topic_idx = 0
696
-
697
  topic, question = profile_topics[topic_idx]
698
-
699
  # Generate contextual options
700
  options = generate_category_specific_options("profile", question, profile_data, None)
701
  random.shuffle(options)
702
-
703
  return {
704
  "question": question,
705
  "options": options,
706
  "source": "fallback_profile",
707
- "question_type": "profile"
708
  }
709
 
 
710
  def generate_expectation_question(state: Dict, expectation_data: Dict) -> Dict:
711
  """Generate a single expectation question"""
712
  expectation_topics = [
713
- ("conflict", f"When tensions arise, what's your instinctive approach to resolution?"),
714
- ("values", f"How do your core principles guide your everyday choices?"),
715
- ("finance", f"What mindset drives your approach to shared financial decisions?"),
716
- ("balance", f"How do you navigate between personal needs and relationship commitments?"),
717
- ("dealbreakers", f"What boundaries are non-negotiable for you in close relationships?")
 
 
 
718
  ]
719
-
720
- topic_idx = len([q for q in state.get("history", []) if q.get("question_type") == "expectation"])
 
 
721
  if topic_idx >= len(expectation_topics):
722
  topic_idx = 0
723
-
724
  topic, question = expectation_topics[topic_idx]
725
-
726
  # Generate contextual options
727
- options = generate_category_specific_options("expectation", question, None, expectation_data)
 
 
728
  random.shuffle(options)
729
-
730
  return {
731
  "question": question,
732
  "options": options,
733
  "source": "fallback_expectation",
734
- "question_type": "expectation"
735
  }
736
 
737
 
@@ -739,108 +957,127 @@ def generate_character_question(theme: str, state: Dict) -> Dict:
739
  """Generate a single character question"""
740
  prof = state.get("profile", {})
741
  name = prof.get("full_name") or "Partner"
742
-
743
  short_theme = theme.split(" around ")[-1].strip()[:50]
744
- question = f"When {short_theme}, what's your typical response?"
745
-
746
  # Generate contextual options
747
  options = generate_category_specific_options("character", question, None, None)
748
  random.shuffle(options)
749
-
750
  return {
751
  "question": question,
752
  "options": options,
753
  "source": "fallback_character",
754
- "question_type": "character"
755
  }
756
 
757
 
758
-
759
-
760
- def generate_fallback_with_distribution(themes: List[str], state: Dict, profile_data: Dict, expectation_data: Dict, context: str = "") -> List[Dict]:
 
 
 
 
761
  """
762
- Fallback generator that enforces the 5-5-10 distribution
 
 
 
 
763
  """
764
- items = []
765
- prof = state.get("profile", {}) or {}
766
- name = prof.get("full_name") or "Partner"
767
-
768
- # 1. Generate 5 PROFILE-BASED questions
769
- profile_sources = [
770
- ("education", f"How does your {profile_data['education']} background influence your approach to learning new things?"),
771
- ("employment", f"Given your work as {profile_data['employment']}, what problem-solving methods do you typically use?"),
772
- ("hobbies", f"When engaging in {profile_data['hobbies']}, how do you typically organize your activity?"),
773
- ("family_type", f"Growing up in a {profile_data['family_type']} family, what communication patterns feel most natural to you?"),
774
- ("current_city", f"Living in {profile_data['current_city']}, how do you adapt to your daily environment?")
775
  ]
776
-
777
- for source, question in profile_sources:
778
  opts = [
779
- {"text": "Analyze data and research thoroughly before deciding", "color": "blue"},
780
- {"text": "Create a structured plan and follow established procedures", "color": "green"},
781
- {"text": "Take immediate action and coordinate with people involved", "color": "red"},
782
- {"text": "Brainstorm creative approaches and explore possibilities", "color": "yellow"},
783
  ]
784
  random.shuffle(opts)
785
- items.append({
786
- "question": question,
787
- "options": opts,
788
- "source": "fallback_profile",
789
- "question_type": "profile"
790
- })
791
-
792
- # 2. Generate 5 EXPECTATION-BASED questions
793
- expectation_sources = [
794
- ("conflict_style", f"When facing disagreement ({expectation_data['conflict_style']}), how do you typically respond?"),
795
- ("financial_style", f"Regarding money matters ({expectation_data['financial_style']}), what's your immediate reaction to financial decisions?"),
796
- ("family_values", f"Considering your family values ({expectation_data['family_values']}), how do you approach family-related decisions?"),
797
- ("work_life", f"With your work-life preference ({expectation_data['lifestyle_pref']}), how do you manage daily priorities?"),
798
- ("deal_breakers", f"Given your deal breakers ({expectation_data['deal_breakers'][:50] if expectation_data['deal_breakers'] else 'certain boundaries'}), how do you establish personal limits?")
 
 
799
  ]
800
-
801
- for source, question in expectation_sources:
802
  opts = [
803
- {"text": "Gather all relevant information and analyze carefully", "color": "blue"},
804
- {"text": "Follow a systematic process to evaluate options", "color": "green"},
805
- {"text": "Make a quick decision and implement immediately", "color": "red"},
806
- {"text": "Consider innovative solutions and future possibilities", "color": "yellow"},
807
  ]
808
  random.shuffle(opts)
809
- items.append({
810
- "question": question,
811
- "options": opts,
812
- "source": "fallback_expectation",
813
- "question_type": "expectation"
814
- })
815
-
 
 
816
  # 3. Generate 10 CHARACTER-BASED questions from themes
817
- for i, theme in enumerate(themes[:10]): # Use first 10 themes
818
- short = theme.split(" around ")[-1].strip()
819
  question = f"When dealing with {short}, what is your typical approach?"
820
-
821
  opts = [
822
- {"text": "Research facts and analyze details before acting", "color": "blue"},
823
- {"text": "Develop a step-by-step plan and follow it", "color": "green"},
824
- {"text": "Take charge and coordinate people to solve it", "color": "red"},
825
- {"text": "Explore creative ideas and unconventional solutions", "color": "yellow"},
826
  ]
827
  random.shuffle(opts)
828
- items.append({
829
- "question": question,
830
- "options": opts,
831
- "source": "fallback_character",
832
- "question_type": "character"
833
- })
834
-
 
 
835
  # Ensure we have exactly 20 questions
836
  return items[:20]
837
 
838
 
839
  class SessionState:
840
- def __init__(self, n_questions: int, batch_size: int, domain: str = "general", role: Optional[str] = None, profile: Optional[Dict] = None):
 
 
 
 
 
 
 
841
  domain = (domain or role or "general").lower()
842
  self.domain = domain if domain in DOMAINS else "general"
843
- self.role = (role or self.domain)
844
  self.profile = profile or {}
845
  self.n_questions = max(1, min(n_questions, MAX_QUESTIONS))
846
  self.batch_size = max(1, batch_size)
@@ -850,11 +1087,14 @@ class SessionState:
850
  self.queue: List[Dict] = []
851
  self.finished = False
852
  self.used_topics: List[str] = []
853
- self.history_of_questions: List[str] = [] # Add this line to track question texts
 
854
 
855
  def to_min_state(self) -> Dict:
856
  total = sum(self.color_counts.values()) or 1
857
- mix_percentages = {k: round((v / total) * 100, 2) for k, v in self.color_counts.items()}
 
 
858
  dominant = max(self.color_counts, key=self.color_counts.get) if total else None
859
  return {
860
  "asked": self.asked,
@@ -868,10 +1108,12 @@ class SessionState:
868
  def remaining(self) -> int:
869
  return self.n_questions - self.asked
870
 
 
871
  SESSIONS_FILE = os.getenv("PYMATCH_SESSIONS_FILE", "sessions.json")
872
  _sessions_lock = threading.Lock()
873
  SESSIONS: Dict[str, SessionState] = {}
874
 
 
875
  def save_sessions():
876
  try:
877
  with _sessions_lock:
@@ -883,34 +1125,56 @@ def save_sessions():
883
  except Exception as e:
884
  print("Failed to save sessions:", e)
885
 
 
886
  def persist_final_progress(user_id: Optional[str], role: str, mix: Dict[str, float]) -> bool:
887
  from database import get_db_connection
888
  from config import PROGRESS_TBL
889
-
890
  llm_id = str(uuid.uuid4())
891
- blue = float(mix.get("blue", 0.0))
892
- green = float(mix.get("green", 0.0))
893
  yellow = float(mix.get("yellow", 0.0))
894
- red = float(mix.get("red", 0.0))
895
  try:
896
  conn = get_db_connection()
897
  cur = conn.cursor()
898
  # Try with llm_id; if identity error, retry without it
899
  try:
900
- cur.execute(f"""
 
901
  INSERT INTO [dbo].[{PROGRESS_TBL}]
902
  ([llm_id],[user_id],[role],[blue],[green],[yellow],[red],[created_at])
903
  VALUES (?,?,?,?,?,?,?,SYSUTCDATETIME())
904
- """, (llm_id, str(user_id) if user_id is not None else None, role, blue, green, yellow, red))
 
 
 
 
 
 
 
 
 
 
905
  conn.commit()
906
  return True
907
  except pyodbc.Error as e:
908
  if "IDENTITY_INSERT" in str(e) or "(544)" in str(e):
909
- cur.execute(f"""
 
910
  INSERT INTO [dbo].[{PROGRESS_TBL}]
911
  ([user_id],[role],[blue],[green],[yellow],[red],[created_at])
912
  VALUES (?,?,?,?,?,?,SYSUTCDATETIME())
913
- """, (str(user_id) if user_id is not None else None, role, blue, green, yellow, red))
 
 
 
 
 
 
 
 
 
914
  conn.commit()
915
  return True
916
  else:
@@ -920,29 +1184,38 @@ def persist_final_progress(user_id: Optional[str], role: str, mix: Dict[str, flo
920
  print("Persist final progress failed:", ex)
921
  return False
922
  finally:
923
- try: conn.close()
924
- except: pass
 
 
 
925
 
926
  def choose_themes(sess, k: int) -> List[str]:
927
- """
928
- Instead of generic topic banks, use FAISS to retrieve text chunks from the document.
929
- """
930
  try:
931
  from faiss_service import HAS_FAISS, FAISS_INDEX, TEXT_CHUNKS
932
-
933
  if HAS_FAISS and FAISS_INDEX is not None and TEXT_CHUNKS:
934
  # Just grab k random chunks from the indexed document
935
  selected = random.sample(TEXT_CHUNKS, min(k, len(TEXT_CHUNKS)))
936
- # Wrap them as "themes" but really they're just context
937
  return selected
938
  except ImportError:
939
  pass
940
-
941
  # fallback: use generic themes
942
  fallback_themes = [
943
- "communication style", "conflict resolution", "decision making",
944
- "problem solving", "team collaboration", "personal values",
945
- "work habits", "social interaction", "stress management",
946
- "goal setting", "time management", "relationship dynamics"
 
 
 
 
 
 
 
 
947
  ]
948
- return random.sample(fallback_themes, min(k, len(fallback_themes)))
 
 
1
  import pyodbc
2
  import os
3
  import json
 
21
  HAS_LLM_STACK = False
22
  HAS_LLM = False
23
 
24
+
25
  class Option(BaseModel):
26
  text: str
27
  color: str
28
 
29
+
30
  class QAItem(BaseModel):
31
  question: str
32
  options: List[Option] = Field(min_items=4, max_items=4)
33
 
34
+
35
  class BatchQA(BaseModel):
36
  items: List[QAItem] = Field(..., min_items=1)
37
+
38
+
39
  SYSTEM_PROMPT = (
40
  "You write marriage compatibility assessment questions that reveal four personality colors through forced choices:\n"
41
  "- blue=analytical, fact-based (positive: thorough, precise | negative: overly critical, data-obsessed)\n"
 
69
 
70
  USER_PROMPT_BATCH = (
71
  "Context (from Surrounded by Idiots or other corpus):\n{context}\n\n"
72
+ "Question Type: {question_type}\n\n"
 
73
  "User Profile (Current Background):\n"
74
  "- Education: {education}\n"
75
  "- Employment: {employment}\n"
 
92
  "Themes (array of short strings): {themes_json}\n"
93
  "Previously asked questions: {previous_questions}\n\n"
94
  "{format_instructions}\n\n"
 
95
  "Generate {question_type} questions:\n"
96
  "- If QUESTION TYPE is 'profile': Generate 5 questions using ONLY profile data (education, employment, hobbies, family background, current lifestyle)\n"
97
  "- If QUESTION TYPE is 'expectation': Generate 5 questions using ONLY expectation data (conflict style, financial preferences, values, deal breakers)\n"
98
  "- If QUESTION TYPE is 'character': Generate 10 questions about CURRENT behavior in various life situations\n\n"
 
99
  "CRITICAL RULES:\n"
100
  "1) DO NOT use prefixes like 'Based on your profile' or 'Considering your expectations'\n"
101
  "2) Questions should be natural and flow conversationally\n"
 
114
  if HAS_LLM_STACK and os.getenv("OPENAI_API_KEY"):
115
  try:
116
  PARSER_BATCH = PydanticOutputParser(pydantic_object=BatchQA)
117
+
118
  def build_batch_chain():
119
  llm = ChatOpenAI(
120
  model="gpt-4o-mini",
 
123
  timeout=30,
124
  model_kwargs={"response_format": {"type": "json_object"}},
125
  )
126
+ prompt = ChatPromptTemplate.from_messages(
127
+ [
128
+ ("system", SYSTEM_PROMPT),
129
+ ("user", USER_PROMPT_BATCH),
130
+ ]
131
+ )
132
  return prompt | llm | PARSER_BATCH
133
 
134
  CHAIN_BATCH = build_batch_chain()
 
136
  print("Failed to build CHAIN_BATCH:", e)
137
  CHAIN_BATCH = None
138
 
139
+
140
  def ensure_valid_colors(options: List[Dict]) -> List[Dict]:
141
  seen, fixed = set(), []
142
  defaults = {
143
+ "blue": "Check facts and numbers",
144
+ "green": "Make a step-by-step plan",
145
+ "red": "Get people together and act",
146
+ "yellow": "Think of a new idea",
147
  }
148
  for o in options:
149
  c = str(o.get("color", "")).lower()
150
  t = str(o.get("text", "")).strip()
151
  if c in COLOR_KEYS and c not in seen and t:
152
+ seen.add(c)
153
+ fixed.append({"text": t[:80], "color": c})
154
  for c in COLOR_KEYS:
155
  if c not in seen:
156
  fixed.append({"text": defaults[c], "color": c})
157
  return fixed[:4]
158
 
159
+
160
  def summarize_profile(profile: Dict) -> Dict:
161
  """Extract all non-PII columns from Marriage table for LLM context"""
162
  out: Dict = {}
163
+
164
  # All columns from Marriage table (excluding PII where possible)
165
  marriage_columns = [
166
+ "user_id",
167
+ "full_name",
168
+ "gender",
169
+ "current_city",
170
+ "marital_status",
171
+ "education_level",
172
+ "employment_status",
173
+ "number_of_siblings",
174
+ "family_type",
175
+ "hobbies_interests",
176
+ "conflict_approach",
177
+ "financial_style",
178
+ "income_range",
179
+ "relocation_willingness",
180
+ "height",
181
+ "skin_tone",
182
+ "languages_spoken",
183
+ "country",
184
+ "blood_group",
185
+ "religion",
186
+ "dual_citizenship",
187
+ "siblings_position",
188
+ "parents_living_status",
189
+ "live_with_parents",
190
+ "support_parents_financially",
191
+ "family_communication_frequency",
192
+ "food_preference",
193
+ "smoking_habit",
194
+ "alcohol_habit",
195
+ "daily_routine",
196
+ "fitness_level",
197
+ "own_pets",
198
+ "travel_preference",
199
+ "relaxation_mode",
200
+ "job_role",
201
+ "work_experience_years",
202
+ "career_aspirations",
203
+ "field_of_study",
204
+ "remark",
205
+ "children_timeline",
206
+ "open_to_adoption",
207
+ "deal_breakers",
208
+ "other_non_negotiables",
209
+ "health_constraints",
210
+ "live_with_inlaws",
211
  ]
212
+
213
  for col in marriage_columns:
214
  v = profile.get(col)
215
  if v not in (None, "", []):
216
  out[col] = v
217
+
218
  return out
219
 
220
+
221
  def offline_generate_batch(themes: List[str], state: Dict, context: str = "") -> List[Dict]:
222
  prof = state.get("profile", {}) or {}
223
  name = prof.get("full_name") or "Partner"
 
247
 
248
  # incorporate small bit from context if available (first 120 chars)
249
  if context:
250
+ ctx_snip = context.replace("\n", " ")[:120]
251
  q = f"{q} (Note: {ctx_snip})"
252
 
253
  # Keep concise
 
255
  q = " ".join(q.split()[:20])
256
 
257
  opts = [
258
+ {"text": "Check facts and numbers", "color": "blue"},
259
+ {"text": "Make a step-by-step plan", "color": "green"},
260
+ {"text": "Get people together and act", "color": "red"},
261
+ {"text": "Think of new ideas", "color": "yellow"},
262
  ]
263
  random.shuffle(opts)
264
  items.append({"question": q, "options": opts, "source": "fallback"})
265
  return items
266
 
267
 
268
+ def generate_category_specific_options(
269
+ question_type: str,
270
+ question_text: str,
271
+ profile_data: Dict = None,
272
+ expectation_data: Dict = None,
273
+ ) -> List[Dict]:
274
+ """Generate options tailored to the question category and content"""
275
  question_lower = question_text.lower()
276
+
277
  # Extract key themes from question for better contextualization
278
  themes_in_question = []
279
+ for theme in [
280
+ "learning",
281
+ "problem",
282
+ "conflict",
283
+ "money",
284
+ "family",
285
+ "work",
286
+ "social",
287
+ "stress",
288
+ "decision",
289
+ "plan",
290
+ ]:
291
  if theme in question_lower:
292
  themes_in_question.append(theme)
293
+
294
+ # Default option templates for each color (Simple English)
295
  base_options = {
296
  "blue": {
297
+ "profile": "Study all the facts and think carefully",
298
+ "expectation": "Get all the details before deciding",
299
+ "character": "Look at the facts and think it through",
300
  },
301
  "green": {
302
+ "profile": "Follow a clear, step-by-step way",
303
+ "expectation": "Set clear rules and follow them",
304
+ "character": "Make a plan and stick to it",
305
  },
306
  "red": {
307
+ "profile": "Do something right away to fix it",
308
+ "expectation": "Take charge and get things done fast",
309
+ "character": "Act now and get people to help",
310
  },
311
  "yellow": {
312
+ "profile": "Try new ways and think differently",
313
+ "expectation": "Think of new ideas and what could be",
314
+ "character": "Think of creative ideas and new ways",
315
+ },
316
  }
317
+
318
+ # Contextual variations based on question themes (Simple English)
319
  contextual_variations = {
320
  "learning": {
321
+ "blue": "Study in a careful, organized way",
322
+ "green": "Follow the lessons step by step",
323
+ "red": "Start doing it right away to learn",
324
+ "yellow": "Try different ways to learn",
325
  },
326
  "problem": {
327
+ "blue": "Look at all the facts to find why",
328
+ "green": "Fix each part one by one",
329
+ "red": "Do something now to fix it",
330
+ "yellow": "Find a new way around it",
331
  },
332
  "conflict": {
333
+ "blue": "Think about each side fairly",
334
+ "green": "Find a fair way to solve it",
335
+ "red": "Face it directly and fix it",
336
+ "yellow": "Find a new way to agree",
337
  },
338
  "money": {
339
+ "blue": "Look at all the money details",
340
+ "green": "Plan spending and track costs",
341
+ "red": "Make quick money choices",
342
+ "yellow": "Think of new ways to earn",
343
  },
344
  "family": {
345
+ "blue": "Think about family matters clearly",
346
+ "green": "Keep family ways and routines",
347
+ "red": "Take the lead in family things",
348
+ "yellow": "Try new family activities",
349
+ },
350
  }
351
+
352
  # Start with base options for the category
353
  options = []
354
  for color in COLOR_KEYS:
355
  base_text = base_options[color][question_type]
356
+
357
  # Add contextual variation if theme matches
358
  for theme, variations in contextual_variations.items():
359
  if theme in themes_in_question:
360
  base_text = variations[color]
361
  break
362
+
363
+ # Add shadow/negative aspects for realism (Simple English)
364
  shadow_aspects = {
365
  "blue": {
366
+ "profile": " (but can overthink things)",
367
+ "expectation": " (but can think too much)",
368
+ "character": " (but can be too picky)",
369
  },
370
  "green": {
371
+ "profile": " (but can be too strict)",
372
+ "expectation": " (but can make too many rules)",
373
+ "character": " (but can hate change)",
374
  },
375
  "red": {
376
+ "profile": " (but can act too fast)",
377
+ "expectation": " (but can be too bossy)",
378
+ "character": " (but can miss details)",
379
  },
380
  "yellow": {
381
+ "profile": " (but can dream too much)",
382
+ "expectation": " (but can forget to finish)",
383
+ "character": " (but can be all over the place)",
384
+ },
385
  }
386
+
387
  # Only add shadow aspects occasionally (30% chance) for variety
388
  if random.random() < 0.3:
389
  shadow = shadow_aspects[color][question_type]
390
+ # Ensure we do not exceed word limit
391
  if len(base_text.split()) + len(shadow.split()) <= 15:
392
  base_text += shadow
 
 
 
 
 
 
 
393
 
394
+ options.append(
395
+ {
396
+ "text": base_text[:80], # Limit length
397
+ "color": color,
398
+ }
399
+ )
400
+
401
+ return options
402
 
403
 
404
+ def get_book_based_options(
405
+ question_type: str, question_text: str
406
+ ) -> List[Dict]:
407
+ """
408
+ Return four options (one per color) for book-based mode.
409
 
410
+ For now this simply delegates to generate_category_specific_options so
411
+ that the function always exists and never raises NameError.
412
+ Later you can replace this logic to actually use Surrounded-by-Idiots
413
+ COLOR_EXAMPLES from faiss_service if you want.
414
+ """
415
+ return generate_category_specific_options(question_type, question_text, None, None)
416
 
417
 
418
+ def generate_batch_questions(
419
+ themes: List[str],
420
+ state: Dict,
421
+ context: str = "",
422
+ previous_questions: List[str] = None,
423
+ ) -> List[Dict]:
424
  # Extract ALL data from Marriage table
425
  profile = state.get("profile", {})
426
  user_id = profile.get("user_id")
427
+
428
  try:
429
  from database import fetch_expectation_data
430
+
431
  expectation_data = fetch_expectation_data(user_id) if user_id else {}
432
  except ImportError:
433
  expectation_data = {}
434
+
435
  # Extract ALL profile data from Marriage table
436
  profile_data = {
437
  # Personal Information
438
  "full_name": profile.get("full_name", "Not specified"),
 
439
  "gender": profile.get("gender", "Not specified"),
440
  "current_city": profile.get("current_city", "Not specified"),
441
  "country": profile.get("country", "Not specified"),
442
  "marital_status": profile.get("marital_status", "Not specified"),
 
443
  # Education & Career
444
  "education_level": profile.get("education_level", "Not specified"),
445
  "employment_status": profile.get("employment_status", "Not specified"),
 
448
  "career_aspirations": profile.get("career_aspirations", "Not specified"),
449
  "field_of_study": profile.get("field_of_study", "Not specified"),
450
  "income_range": profile.get("income_range", "Not specified"),
 
451
  # Family & Background
452
  "number_of_siblings": profile.get("number_of_siblings", "Not specified"),
453
  "family_type": profile.get("family_type", "Not specified"),
454
  "siblings_position": profile.get("siblings_position", "Not specified"),
455
  "parents_living_status": profile.get("parents_living_status", "Not specified"),
456
  "live_with_parents": profile.get("live_with_parents", "Not specified"),
457
+ "support_parents_financially": profile.get(
458
+ "support_parents_financially", "Not specified"
459
+ ),
460
+ "family_communication_frequency": profile.get(
461
+ "family_communication_frequency", "Not specified"
462
+ ),
463
  # Physical & Health
464
  "height": profile.get("height", "Not specified"),
465
  "skin_tone": profile.get("skin_tone", "Not specified"),
466
  "blood_group": profile.get("blood_group", "Not specified"),
467
  "health_constraints": profile.get("health_constraints", "Not specified"),
468
  "fitness_level": profile.get("fitness_level", "Not specified"),
 
469
  # Lifestyle & Habits
470
  "hobbies_interests": str(profile.get("hobbies_interests", "Not specified")),
471
  "conflict_approach": profile.get("conflict_approach", "Not specified"),
 
477
  "own_pets": profile.get("own_pets", "Not specified"),
478
  "travel_preference": profile.get("travel_preference", "Not specified"),
479
  "relaxation_mode": profile.get("relaxation_mode", "Not specified"),
 
480
  # Languages & Relocation
481
  "languages_spoken": profile.get("languages_spoken", "Not specified"),
482
  "relocation_willingness": profile.get("relocation_willingness", "Not specified"),
 
483
  # Religion & Citizenship
484
  "religion": profile.get("religion", "Not specified"),
485
  "dual_citizenship": profile.get("dual_citizenship", "Not specified"),
 
486
  # Relationship Preferences
487
  "children_timeline": profile.get("children_timeline", "Not specified"),
488
  "open_to_adoption": profile.get("open_to_adoption", "Not specified"),
489
  "deal_breakers": profile.get("deal_breakers", "Not specified"),
490
  "other_non_negotiables": profile.get("other_non_negotiables", "Not specified"),
491
  "live_with_inlaws": profile.get("live_with_inlaws", "Not specified"),
 
492
  # Additional Info
493
  "remark": profile.get("remark", "Not specified"),
494
  }
495
+
496
  # Extract ALL expectation data from ExpectationResponse table
497
  expectation_data_dict = {
498
  # Basic Preferences
 
501
  "pref_current_city": expectation_data.get("pref_current_city", "Not specified"),
502
  "pref_countries": expectation_data.get("pref_countries", "Not specified"),
503
  "pref_languages": expectation_data.get("pref_languages", "Not specified"),
504
+ "pref_education_level": expectation_data.get(
505
+ "pref_education_level", "Not specified"
506
+ ),
507
+ "pref_employment_status": expectation_data.get(
508
+ "pref_employment_status", "Not specified"
509
+ ),
510
  # Health & Lifestyle
511
  "health_constraints": expectation_data.get("health_constraints", "Not specified"),
512
  "pref_diet": expectation_data.get("pref_diet", "Not specified"),
513
  "accept_smoking": expectation_data.get("accept_smoking", "Not specified"),
514
  "accept_alcohol": expectation_data.get("accept_alcohol", "Not specified"),
515
  "pref_fitness": expectation_data.get("pref_fitness", "Not specified"),
 
516
  # Family & Living
517
  "pref_family_type": expectation_data.get("pref_family_type", "Not specified"),
518
  "live_with_inlaws": expectation_data.get("live_with_inlaws", "Not specified"),
519
  "children_timeline": expectation_data.get("children_timeline", "Not specified"),
520
  "open_to_adoption": expectation_data.get("open_to_adoption", "Not specified"),
521
+ "pref_live_with_parents": expectation_data.get(
522
+ "pref_live_with_parents", "Not specified"
523
+ ),
524
+ "financial_support_to_parents": expectation_data.get(
525
+ "financial_support_to_parents", "Not specified"
526
+ ),
527
  # Conflict & Finance
528
+ "pref_conflict_approach": expectation_data.get(
529
+ "pref_conflict_approach", "Not specified"
530
+ ),
531
+ "pref_financial_style": expectation_data.get(
532
+ "pref_financial_style", "Not specified"
533
+ ),
534
  "pref_income_range": expectation_data.get("pref_income_range", "Not specified"),
 
535
  # Values & Compatibility
536
+ "religion_alignment": expectation_data.get(
537
+ "religion_alignment", "Not specified"
538
+ ),
539
+ "pref_shared_hobbies": expectation_data.get(
540
+ "pref_shared_hobbies", "Not specified"
541
+ ),
542
  "travel_pref": expectation_data.get("travel_pref", "Not specified"),
543
  "pet_pref": expectation_data.get("pet_pref", "Not specified"),
 
544
  # Career & Relocation
545
+ "pref_partner_relocation": expectation_data.get(
546
+ "pref_partner_relocation", "Not specified"
547
+ ),
548
+ "pref_career_aspirations": expectation_data.get(
549
+ "pref_career_aspirations", "Not specified"
550
+ ),
551
  # Additional Preferences
552
  "marital_status": expectation_data.get("marital_status", "Not specified"),
553
  "skin_tone": expectation_data.get("skin_tone", "Not specified"),
554
  "daily_routine": expectation_data.get("daily_routine", "Not specified"),
555
+ "family_communication_frequency": expectation_data.get(
556
+ "family_communication_frequency", "Not specified"
557
+ ),
558
  "relaxation_mode": expectation_data.get("relaxation_mode", "Not specified"),
 
559
  # Non-negotiables
560
  "deal_breakers": expectation_data.get("deal_breakers", "Not specified"),
561
+ "other_non_negotiables": expectation_data.get(
562
+ "other_non_negotiables", "Not specified"
563
+ ),
564
  # Summary
565
  "expectation_summary": expectation_data.get("expectation_summary", "Not specified"),
566
  "_mandatory_fields": expectation_data.get("_mandatory_fields", "Not specified"),
567
  }
568
+
569
+ # Check if we have book-based options
570
+ try:
571
+ from faiss_service import COLOR_EXAMPLES
572
+
573
+ use_book_options = COLOR_EXAMPLES is not None
574
+ if use_book_options:
575
+ print("Using book-based options from 'Surrounded by Idiots'")
576
+ except ImportError:
577
+ use_book_options = False
578
+
579
  if CHAIN_BATCH is not None and PARSER_BATCH is not None:
580
  try:
581
  items: List[Dict] = []
582
+
583
  # 1. PROFILE-BASED QUESTIONS (5 questions) - Using ALL Marriage table columns
584
  profile_prompt = {
585
  "state": json.dumps(state, ensure_ascii=False),
586
  "themes_json": json.dumps(["profile"] * 5, ensure_ascii=False),
587
+ "previous_questions": json.dumps(
588
+ previous_questions or [], ensure_ascii=False
589
+ ),
590
  "format_instructions": PARSER_BATCH.get_format_instructions(),
591
  "context": "Generate 5 PROFILE-BASED questions using ALL available user background information.",
592
  "question_type": "profile",
 
593
  # Use ALL profile data
594
  "education": f"{profile_data['education_level']} | {profile_data['field_of_study']}",
595
  "employment": f"{profile_data['employment_status']} | {profile_data['job_role']} ({profile_data['work_experience_years']} years)",
596
+ "hobbies": profile_data["hobbies_interests"],
597
  "family_type": f"{profile_data['family_type']} | Siblings: {profile_data['number_of_siblings']} | Position: {profile_data['siblings_position']}",
598
+ "current_lifestyle": (
599
+ "City: "
600
+ f"{profile_data['current_city']}, {profile_data['country']} | "
601
+ f"Height: {profile_data['height']} | Languages: {profile_data['languages_spoken']} | "
602
+ f"Religion: {profile_data['religion']} | Fitness: {profile_data['fitness_level']} | "
603
+ f"Diet: {profile_data['food_preference']} | Habits: Smoking: {profile_data['smoking_habit']}, "
604
+ f"Alcohol: {profile_data['alcohol_habit']}"
605
+ ),
606
  # Expectation data marked as irrelevant
607
  "conflict_style": "IRRELEVANT_FOR_PROFILE_QUESTIONS",
608
  "financial_style": "IRRELEVANT_FOR_PROFILE_QUESTIONS",
 
615
  "ambition_pref": "IRRELEVANT_FOR_PROFILE_QUESTIONS",
616
  "deal_breakers": "IRRELEVANT_FOR_PROFILE_QUESTIONS",
617
  }
618
+
619
  result = CHAIN_BATCH.invoke(profile_prompt)
620
  profile_items = get_items_from_result(result)
621
+
622
  for qa in profile_items[:5]:
623
  out = qa.dict() if hasattr(qa, "dict") else dict(qa)
624
+
625
+ # Get options from book if available
626
+ if use_book_options:
627
+ options = get_book_based_options("profile", out.get("question", ""))
628
+ if options and len(options) == 4:
629
+ out["options"] = options
630
+ else:
631
+ # Fallback to generated options
632
+ out["options"] = generate_category_specific_options(
633
+ "profile", out.get("question", ""), profile_data, None
634
+ )
635
+ else:
636
+ out["options"] = generate_category_specific_options(
637
+ "profile", out.get("question", ""), profile_data, None
638
+ )
639
+
640
+ out["source"] = (
641
+ "llm_profile_book" if use_book_options else "llm_profile"
642
  )
 
643
  out["question_type"] = "profile"
644
  random.shuffle(out["options"])
645
  items.append(out)
646
+
647
  # 2. EXPECTATION-BASED QUESTIONS (5 questions) - Using ALL ExpectationResponse columns
648
  expectation_prompt = {
649
  "state": json.dumps(state, ensure_ascii=False),
650
  "themes_json": json.dumps(["expectation"] * 5, ensure_ascii=False),
651
+ "previous_questions": json.dumps(
652
+ [q["question"] for q in items] + (previous_questions or []),
653
+ ensure_ascii=False,
654
+ ),
655
  "format_instructions": PARSER_BATCH.get_format_instructions(),
656
  "context": "Generate 5 EXPECTATION-BASED questions using ALL relationship preferences and expectations.",
657
  "question_type": "expectation",
 
658
  # Minimal profile context
659
  "education": "Background context only",
660
  "employment": "Background context only",
661
  "hobbies": "Background context only",
662
  "family_type": "Background context only",
663
  "current_lifestyle": "General context",
 
664
  # Use ALL expectation data
665
  "conflict_style": f"{expectation_data_dict['pref_conflict_approach']}",
666
+ "financial_style": (
667
+ f"{expectation_data_dict['pref_financial_style']} | "
668
+ f"Income: {expectation_data_dict['pref_income_range']}"
669
+ ),
670
+ "income_range": expectation_data_dict["pref_income_range"],
671
+ "relocation_willingness": (
672
+ f"{expectation_data_dict['pref_partner_relocation']}"
673
+ ),
674
+ "family_values": (
675
+ f"{expectation_data_dict['pref_family_type']} | "
676
+ f"Live with in-laws: {expectation_data_dict['live_with_inlaws']} | "
677
+ f"Children timeline: {expectation_data_dict['children_timeline']}"
678
+ ),
679
+ "core_values": (
680
+ f"Religion: {expectation_data_dict['religion_alignment']} | "
681
+ f"Deal breakers: {expectation_data_dict['deal_breakers']}"
682
+ ),
683
+ "lifestyle_pref": (
684
+ f"Fitness: {expectation_data_dict['pref_fitness']} | "
685
+ f"Diet: {expectation_data_dict['pref_diet']} | "
686
+ f"Daily routine: {expectation_data_dict['daily_routine']}"
687
+ ),
688
+ "social_pref": (
689
+ f"Hobbies: {expectation_data_dict['pref_shared_hobbies']} | "
690
+ f"Travel: {expectation_data_dict['travel_pref']} | "
691
+ f"Pets: {expectation_data_dict['pet_pref']}"
692
+ ),
693
+ "ambition_pref": (
694
+ f"Career: {expectation_data_dict['pref_career_aspirations']} | "
695
+ f"Education: {expectation_data_dict['pref_education_level']}"
696
+ ),
697
+ "deal_breakers": (
698
+ f"{expectation_data_dict['deal_breakers']} | "
699
+ f"Other non-negotiables: {expectation_data_dict['other_non_negotiables']}"
700
+ ),
701
  }
702
+
703
  result = CHAIN_BATCH.invoke(expectation_prompt)
704
  expectation_items = get_items_from_result(result)
705
+
706
  for qa in expectation_items[:5]:
707
  out = qa.dict() if hasattr(qa, "dict") else dict(qa)
708
+
709
+ # Get options from book if available
710
+ if use_book_options:
711
+ options = get_book_based_options("expectation", out.get("question", ""))
712
+ if options and len(options) == 4:
713
+ out["options"] = options
714
+ else:
715
+ out["options"] = generate_category_specific_options(
716
+ "expectation",
717
+ out.get("question", ""),
718
+ None,
719
+ expectation_data_dict,
720
+ )
721
+ else:
722
+ out["options"] = generate_category_specific_options(
723
+ "expectation",
724
+ out.get("question", ""),
725
+ None,
726
+ expectation_data_dict,
727
+ )
728
+
729
+ out["source"] = (
730
+ "llm_expectation_book" if use_book_options else "llm_expectation"
731
  )
 
732
  out["question_type"] = "expectation"
733
  random.shuffle(out["options"])
734
  items.append(out)
735
+
736
  # 3. CHARACTER-BASED QUESTIONS (10 questions) - Using data from BOTH tables
737
  character_prompt = {
738
  "state": json.dumps(state, ensure_ascii=False),
739
  "themes_json": json.dumps(themes[:10], ensure_ascii=False),
740
+ "previous_questions": json.dumps(
741
+ [q["question"] for q in items] + (previous_questions or []),
742
+ ensure_ascii=False,
743
+ ),
744
  "format_instructions": PARSER_BATCH.get_format_instructions(),
745
+ "context": context[:2000]
746
+ + "\n\nGenerate 10 CHARACTER-BASED questions using ALL available data.",
747
  "question_type": "character",
 
748
  # All data from Marriage table
749
+ "education": profile_data["education_level"],
750
+ "employment": profile_data["employment_status"],
751
+ "hobbies": profile_data["hobbies_interests"],
752
+ "family_type": profile_data["family_type"],
753
+ "current_lifestyle": (
754
+ f"{profile_data['current_city']}, {profile_data['country']} | "
755
+ f"{profile_data['daily_routine']} | Relaxation: {profile_data['relaxation_mode']}"
756
+ ),
757
  # All data from ExpectationResponse table
758
+ "conflict_style": expectation_data_dict["pref_conflict_approach"],
759
+ "financial_style": expectation_data_dict["pref_financial_style"],
760
+ "income_range": expectation_data_dict["pref_income_range"],
761
+ "relocation_willingness": expectation_data_dict[
762
+ "pref_partner_relocation"
763
+ ],
764
+ "family_values": expectation_data_dict["pref_family_type"],
765
+ "core_values": expectation_data_dict["religion_alignment"],
766
+ "lifestyle_pref": expectation_data_dict["pref_fitness"],
767
+ "social_pref": expectation_data_dict["pref_shared_hobbies"],
768
+ "ambition_pref": expectation_data_dict["pref_career_aspirations"],
769
+ "deal_breakers": expectation_data_dict["deal_breakers"],
770
  }
771
+
772
  result = CHAIN_BATCH.invoke(character_prompt)
773
  character_items = get_items_from_result(result)
774
+
775
  for qa in character_items[:10]:
776
  out = qa.dict() if hasattr(qa, "dict") else dict(qa)
777
+
778
+ # Get options from book if available
779
+ if use_book_options:
780
+ options = get_book_based_options("character", out.get("question", ""))
781
+ if options and len(options) == 4:
782
+ out["options"] = options
783
+ else:
784
+ out["options"] = generate_category_specific_options(
785
+ "character",
786
+ out.get("question", ""),
787
+ profile_data,
788
+ expectation_data_dict,
789
+ )
790
+ else:
791
+ out["options"] = generate_category_specific_options(
792
+ "character",
793
+ out.get("question", ""),
794
+ profile_data,
795
+ expectation_data_dict,
796
+ )
797
+
798
+ out["source"] = (
799
+ "llm_character_book" if use_book_options else "llm_character"
800
  )
 
801
  out["question_type"] = "character"
802
  random.shuffle(out["options"])
803
  items.append(out)
804
+
805
  # Verify we have exactly 20 questions
806
  if len(items) == 20:
807
  return items[:20]
808
  else:
809
+ # If LLM did not generate enough, fill with fallback
810
+ return fill_missing_questions(
811
+ items, themes, state, profile_data, expectation_data_dict, context
812
+ )
813
+
814
  except Exception as e:
815
  print("LLM batch generation failed:", e)
816
+ return generate_fallback_with_distribution(
817
+ themes, state, profile_data, expectation_data_dict, context
818
+ )
819
  else:
820
+ return generate_fallback_with_distribution(
821
+ themes, state, profile_data, expectation_data_dict, context
822
+ )
823
 
824
 
825
  def get_items_from_result(result):
 
831
  else:
832
  return []
833
 
834
+
835
+ def fill_missing_questions(
836
+ current_items: List[Dict],
837
+ themes: List[str],
838
+ state: Dict,
839
+ profile_data: Dict,
840
+ expectation_data: Dict,
841
+ context: str = "",
842
+ ) -> List[Dict]:
843
  """Fill missing questions to reach 20 total"""
844
  items = current_items.copy()
845
+
846
  # Count current distribution
847
  profile_count = sum(1 for q in items if q.get("question_type") == "profile")
848
  expectation_count = sum(1 for q in items if q.get("question_type") == "expectation")
849
  character_count = sum(1 for q in items if q.get("question_type") == "character")
850
+
851
  # Fill profile questions if needed
852
  while profile_count < 5:
853
  profile_q = generate_profile_question(state, profile_data)
854
  items.append(profile_q)
855
  profile_count += 1
856
+
857
  # Fill expectation questions if needed
858
  while expectation_count < 5:
859
  expectation_q = generate_expectation_question(state, expectation_data)
860
  items.append(expectation_q)
861
  expectation_count += 1
862
+
863
  # Fill character questions if needed
864
  while character_count < 10:
865
  theme = themes[character_count % len(themes)] if themes else "daily situation"
866
  character_q = generate_character_question(theme, state)
867
  items.append(character_q)
868
  character_count += 1
869
+
870
  return items[:20]
871
 
872
+
873
  def generate_profile_question(state: Dict, profile_data: Dict) -> Dict:
874
  """Generate a single profile question"""
875
  prof = state.get("profile", {})
876
  name = prof.get("full_name") or "Partner"
877
+
878
  profile_topics = [
879
+ (
880
+ "education",
881
+ "How does your education background shape how you approach complex information?",
882
+ ),
883
+ (
884
+ "employment",
885
+ "What methods from your work life do you use for personal challenges?",
886
+ ),
887
+ (
888
+ "hobbies",
889
+ "How do your hobbies change the way you try new experiences?",
890
+ ),
891
+ (
892
+ "family",
893
+ "What communication style from your family feels most natural to you?",
894
+ ),
895
+ (
896
+ "background",
897
+ "How does your personal history affect your current decision-making style?",
898
+ ),
899
  ]
900
+
901
+ topic_idx = len(
902
+ [q for q in state.get("history", []) if q.get("question_type") == "profile"]
903
+ )
904
  if topic_idx >= len(profile_topics):
905
  topic_idx = 0
906
+
907
  topic, question = profile_topics[topic_idx]
908
+
909
  # Generate contextual options
910
  options = generate_category_specific_options("profile", question, profile_data, None)
911
  random.shuffle(options)
912
+
913
  return {
914
  "question": question,
915
  "options": options,
916
  "source": "fallback_profile",
917
+ "question_type": "profile",
918
  }
919
 
920
+
921
  def generate_expectation_question(state: Dict, expectation_data: Dict) -> Dict:
922
  """Generate a single expectation question"""
923
  expectation_topics = [
924
+ (
925
+ "conflict",
926
+ "When tensions arise, what is your most natural way to handle them?",
927
+ ),
928
+ ("values", "How do your core principles guide your everyday choices?"),
929
+ ("finance", "What mindset guides your shared financial decisions?"),
930
+ ("balance", "How do you balance your needs with your partner's needs?"),
931
+ ("dealbreakers", "What personal boundaries feel completely non-negotiable to you?"),
932
  ]
933
+
934
+ topic_idx = len(
935
+ [q for q in state.get("history", []) if q.get("question_type") == "expectation"]
936
+ )
937
  if topic_idx >= len(expectation_topics):
938
  topic_idx = 0
939
+
940
  topic, question = expectation_topics[topic_idx]
941
+
942
  # Generate contextual options
943
+ options = generate_category_specific_options(
944
+ "expectation", question, None, expectation_data
945
+ )
946
  random.shuffle(options)
947
+
948
  return {
949
  "question": question,
950
  "options": options,
951
  "source": "fallback_expectation",
952
+ "question_type": "expectation",
953
  }
954
 
955
 
 
957
  """Generate a single character question"""
958
  prof = state.get("profile", {})
959
  name = prof.get("full_name") or "Partner"
960
+
961
  short_theme = theme.split(" around ")[-1].strip()[:50]
962
+ question = f"When {short_theme}, what is your typical response?"
963
+
964
  # Generate contextual options
965
  options = generate_category_specific_options("character", question, None, None)
966
  random.shuffle(options)
967
+
968
  return {
969
  "question": question,
970
  "options": options,
971
  "source": "fallback_character",
972
+ "question_type": "character",
973
  }
974
 
975
 
976
+ def generate_fallback_with_distribution(
977
+ themes: List[str],
978
+ state: Dict,
979
+ profile_data: Dict,
980
+ expectation_data: Dict,
981
+ context: str = "",
982
+ ) -> List[Dict]:
983
  """
984
+ Fallback generator that enforces the 5-5-10 distribution.
985
+
986
+ This version does NOT access missing keys like 'education' directly.
987
+ It uses generic wording so it never raises KeyError even if
988
+ profile_data or expectation_data is empty or partial.
989
  """
990
+ items: List[Dict] = []
991
+
992
+ # 1. Generate 5 PROFILE-BASED questions (generic but aligned with concept)
993
+ profile_questions = [
994
+ "How does your education background influence the way you learn new things?",
995
+ "How does your main work or job affect how you solve problems?",
996
+ "How do your hobbies or interests change the way you try new things?",
997
+ "How does your family background shape your way of talking and listening?",
998
+ "How does your living place and daily life affect your choices?",
 
 
999
  ]
1000
+
1001
+ for q in profile_questions:
1002
  opts = [
1003
+ {"text": "Look at facts and think before deciding", "color": "blue"},
1004
+ {"text": "Make a clear plan and follow it", "color": "green"},
1005
+ {"text": "Act now and involve people", "color": "red"},
1006
+ {"text": "Think of new and different ways", "color": "yellow"},
1007
  ]
1008
  random.shuffle(opts)
1009
+ items.append(
1010
+ {
1011
+ "question": q,
1012
+ "options": opts,
1013
+ "source": "fallback_profile",
1014
+ "question_type": "profile",
1015
+ }
1016
+ )
1017
+
1018
+ # 2. Generate 5 EXPECTATION-BASED questions (generic expectation focus)
1019
+ expectation_questions = [
1020
+ "When there is conflict in a relationship, what do you usually do first?",
1021
+ "When you and your partner handle money, what feels most natural to you?",
1022
+ "When your values and your partner's values differ, how do you respond?",
1023
+ "How do you balance work, rest, and time with your partner?",
1024
+ "When a personal boundary is close to being crossed, what do you usually do?",
1025
  ]
1026
+
1027
+ for q in expectation_questions:
1028
  opts = [
1029
+ {"text": "Check all facts before reacting", "color": "blue"},
1030
+ {"text": "Use a calm, step-by-step way", "color": "green"},
1031
+ {"text": "Decide fast and take action", "color": "red"},
1032
+ {"text": "Look for a new creative solution", "color": "yellow"},
1033
  ]
1034
  random.shuffle(opts)
1035
+ items.append(
1036
+ {
1037
+ "question": q,
1038
+ "options": opts,
1039
+ "source": "fallback_expectation",
1040
+ "question_type": "expectation",
1041
+ }
1042
+ )
1043
+
1044
  # 3. Generate 10 CHARACTER-BASED questions from themes
1045
+ for i, theme in enumerate(themes[:10] or ["daily situation"] * 10):
1046
+ short = theme.split(" around ")[-1].strip() or "a daily situation"
1047
  question = f"When dealing with {short}, what is your typical approach?"
1048
+
1049
  opts = [
1050
+ {"text": "Look at facts and details first", "color": "blue"},
1051
+ {"text": "Make a step-by-step plan", "color": "green"},
1052
+ {"text": "Take charge and move things forward", "color": "red"},
1053
+ {"text": "Think of new and different ideas", "color": "yellow"},
1054
  ]
1055
  random.shuffle(opts)
1056
+ items.append(
1057
+ {
1058
+ "question": question,
1059
+ "options": opts,
1060
+ "source": "fallback_character",
1061
+ "question_type": "character",
1062
+ }
1063
+ )
1064
+
1065
  # Ensure we have exactly 20 questions
1066
  return items[:20]
1067
 
1068
 
1069
  class SessionState:
1070
+ def __init__(
1071
+ self,
1072
+ n_questions: int,
1073
+ batch_size: int,
1074
+ domain: str = "general",
1075
+ role: Optional[str] = None,
1076
+ profile: Optional[Dict] = None,
1077
+ ):
1078
  domain = (domain or role or "general").lower()
1079
  self.domain = domain if domain in DOMAINS else "general"
1080
+ self.role = role or self.domain
1081
  self.profile = profile or {}
1082
  self.n_questions = max(1, min(n_questions, MAX_QUESTIONS))
1083
  self.batch_size = max(1, batch_size)
 
1087
  self.queue: List[Dict] = []
1088
  self.finished = False
1089
  self.used_topics: List[str] = []
1090
+ # Track question texts, used by LLM prompt
1091
+ self.history_of_questions: List[str] = []
1092
 
1093
  def to_min_state(self) -> Dict:
1094
  total = sum(self.color_counts.values()) or 1
1095
+ mix_percentages = {
1096
+ k: round((v / total) * 100, 2) for k, v in self.color_counts.items()
1097
+ }
1098
  dominant = max(self.color_counts, key=self.color_counts.get) if total else None
1099
  return {
1100
  "asked": self.asked,
 
1108
  def remaining(self) -> int:
1109
  return self.n_questions - self.asked
1110
 
1111
+
1112
  SESSIONS_FILE = os.getenv("PYMATCH_SESSIONS_FILE", "sessions.json")
1113
  _sessions_lock = threading.Lock()
1114
  SESSIONS: Dict[str, SessionState] = {}
1115
 
1116
+
1117
  def save_sessions():
1118
  try:
1119
  with _sessions_lock:
 
1125
  except Exception as e:
1126
  print("Failed to save sessions:", e)
1127
 
1128
+
1129
  def persist_final_progress(user_id: Optional[str], role: str, mix: Dict[str, float]) -> bool:
1130
  from database import get_db_connection
1131
  from config import PROGRESS_TBL
1132
+
1133
  llm_id = str(uuid.uuid4())
1134
+ blue = float(mix.get("blue", 0.0))
1135
+ green = float(mix.get("green", 0.0))
1136
  yellow = float(mix.get("yellow", 0.0))
1137
+ red = float(mix.get("red", 0.0))
1138
  try:
1139
  conn = get_db_connection()
1140
  cur = conn.cursor()
1141
  # Try with llm_id; if identity error, retry without it
1142
  try:
1143
+ cur.execute(
1144
+ f"""
1145
  INSERT INTO [dbo].[{PROGRESS_TBL}]
1146
  ([llm_id],[user_id],[role],[blue],[green],[yellow],[red],[created_at])
1147
  VALUES (?,?,?,?,?,?,?,SYSUTCDATETIME())
1148
+ """,
1149
+ (
1150
+ llm_id,
1151
+ str(user_id) if user_id is not None else None,
1152
+ role,
1153
+ blue,
1154
+ green,
1155
+ yellow,
1156
+ red,
1157
+ ),
1158
+ )
1159
  conn.commit()
1160
  return True
1161
  except pyodbc.Error as e:
1162
  if "IDENTITY_INSERT" in str(e) or "(544)" in str(e):
1163
+ cur.execute(
1164
+ f"""
1165
  INSERT INTO [dbo].[{PROGRESS_TBL}]
1166
  ([user_id],[role],[blue],[green],[yellow],[red],[created_at])
1167
  VALUES (?,?,?,?,?,?,SYSUTCDATETIME())
1168
+ """,
1169
+ (
1170
+ str(user_id) if user_id is not None else None,
1171
+ role,
1172
+ blue,
1173
+ green,
1174
+ yellow,
1175
+ red,
1176
+ ),
1177
+ )
1178
  conn.commit()
1179
  return True
1180
  else:
 
1184
  print("Persist final progress failed:", ex)
1185
  return False
1186
  finally:
1187
+ try:
1188
+ conn.close()
1189
+ except Exception:
1190
+ pass
1191
+
1192
 
1193
  def choose_themes(sess, k: int) -> List[str]:
1194
+ """Choose k themes, preferably from FAISS TEXT_CHUNKS, else generic list."""
 
 
1195
  try:
1196
  from faiss_service import HAS_FAISS, FAISS_INDEX, TEXT_CHUNKS
1197
+
1198
  if HAS_FAISS and FAISS_INDEX is not None and TEXT_CHUNKS:
1199
  # Just grab k random chunks from the indexed document
1200
  selected = random.sample(TEXT_CHUNKS, min(k, len(TEXT_CHUNKS)))
1201
+ # Wrap them as "themes" but really they are just context text
1202
  return selected
1203
  except ImportError:
1204
  pass
1205
+
1206
  # fallback: use generic themes
1207
  fallback_themes = [
1208
+ "communication style",
1209
+ "conflict resolution",
1210
+ "decision making",
1211
+ "problem solving",
1212
+ "team collaboration",
1213
+ "personal values",
1214
+ "work habits",
1215
+ "social interaction",
1216
+ "stress management",
1217
+ "goal setting",
1218
+ "time management",
1219
+ "relationship dynamics",
1220
  ]
1221
+ return random.sample(fallback_themes, min(k, len(fallback_themes)))
routes/auth_routes.py CHANGED
@@ -60,7 +60,7 @@ def get_db_connection():
60
 
61
  # Read settings from environment variables
62
  SQL_DRIVER = os.getenv("PYMATCH_SQL_DRIVER", "ODBC Driver 17 for SQL Server")
63
- SQL_SERVER = os.getenv("PYMATCH_SQL_SERVER", r"localhost\sqlexpress")
64
  SQL_DB = os.getenv("PYMATCH_SQL_DB", "Py_Match")
65
  SQL_TRUSTED = os.getenv("PYMATCH_SQL_TRUSTED", "yes").lower()
66
 
 
60
 
61
  # Read settings from environment variables
62
  SQL_DRIVER = os.getenv("PYMATCH_SQL_DRIVER", "ODBC Driver 17 for SQL Server")
63
+ SQL_SERVER = os.getenv("PYMATCH_SQL_SERVER", r"PYKARA")
64
  SQL_DB = os.getenv("PYMATCH_SQL_DB", "Py_Match")
65
  SQL_TRUSTED = os.getenv("PYMATCH_SQL_TRUSTED", "yes").lower()
66
 
routes/matching_routes.py CHANGED
@@ -333,4 +333,101 @@ def unified_match(user_id=None):
333
  "matches": matches_by_range,
334
  "count": len(expectation_matches),
335
  "mode": "expectation"
336
- })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
333
  "matches": matches_by_range,
334
  "count": len(expectation_matches),
335
  "mode": "expectation"
336
+ })
337
+
338
+
339
+ @matching_bp.get("/compatibility-explanation")
340
+ def get_compatibility_explanation():
341
+ user_id = request.args.get("user_id", type=int)
342
+ target_user_id = request.args.get("target_user_id", type=int)
343
+ mode = request.args.get("mode", "expectation-only")
344
+
345
+ if not user_id or not target_user_id:
346
+ return jsonify({"error": "user_id and target_user_id are required"}), 400
347
+
348
+ try:
349
+ # TAB 1 β†’ EXPECTATION ONLY (Rule-based)
350
+ if mode == "expectation-only":
351
+ exp_user = fetch_expectation_data(user_id)
352
+ profile_user = fetch_marriage_profile_data(target_user_id)
353
+
354
+ explanations = generate_expectation_explanation(exp_user, profile_user)
355
+
356
+ return jsonify({
357
+ "mode": "expectation-only",
358
+ "explanations": explanations,
359
+ "source": "expectation-fallback"
360
+ })
361
+
362
+ # TAB 2 β†’ CHARACTER ONLY (LLM-ONLY)
363
+ elif mode == "character":
364
+ llm1 = LLMGeneratedQuestions.query.filter_by(user_id=user_id).first()
365
+ llm2 = LLMGeneratedQuestions.query.filter_by(user_id=target_user_id).first()
366
+
367
+ if not (llm1 and llm2):
368
+ return jsonify({
369
+ "mode": "character",
370
+ "explanations": [
371
+ "Character analysis unavailable - no personality data found for one or both users."
372
+ ],
373
+ "source": "error"
374
+ })
375
+
376
+ u_vec = llm1.color_vec()
377
+ v_vec = llm2.color_vec()
378
+
379
+ print(f"🎯 Generating AI character analysis for users {user_id} and {target_user_id}...")
380
+ character_explanations = generate_character_llm_explanation(u_vec, v_vec)
381
+
382
+ return jsonify({
383
+ "mode": "character",
384
+ "explanations": character_explanations,
385
+ "source": "character-llm"
386
+ })
387
+
388
+ # TAB 3 β†’ EXPECTATION + CHARACTER (Mixed)
389
+ elif mode == "expectation":
390
+ exp_user = fetch_expectation_data(user_id)
391
+ profile_user = fetch_marriage_profile_data(target_user_id)
392
+
393
+ expectation_part = generate_expectation_explanation(exp_user, profile_user)
394
+
395
+ llm1 = LLMGeneratedQuestions.query.filter_by(user_id=user_id).first()
396
+ llm2 = LLMGeneratedQuestions.query.filter_by(user_id=target_user_id).first()
397
+
398
+ character_explanations = []
399
+ if llm1 and llm2:
400
+ try:
401
+ u_vec = llm1.color_vec()
402
+ v_vec = llm2.color_vec()
403
+ character_explanations = generate_character_llm_explanation(u_vec, v_vec)
404
+ source_type = "character-llm"
405
+ except Exception as e:
406
+ print(f"πŸ”΄ LLM failed, using backend fallback: {e}")
407
+ character_explanations = generate_character_fallback_explanation(u_vec, v_vec)
408
+ source_type = "character-fallback"
409
+ else:
410
+ character_explanations = ["Character analysis unavailable for this user."]
411
+ source_type = "error"
412
+
413
+ final_output = expectation_part + ["", "🧠 **AI Character Insights**"] + character_explanations
414
+
415
+ return jsonify({
416
+ "mode": "expectation",
417
+ "explanations": final_output,
418
+ "source": source_type
419
+ })
420
+
421
+ else:
422
+ return jsonify({"error": "Invalid mode"}), 400
423
+
424
+ except Exception as e:
425
+ print(f"πŸ”΄ Error in compatibility explanation: {e}")
426
+ return jsonify({
427
+ "explanations": [f"❌ Service temporarily unavailable: {str(e)}"],
428
+ "source": "error"
429
+ }), 500
430
+
431
+
432
+
433
+