wop commited on
Commit
976e888
Β·
verified Β·
1 Parent(s): 7a1ea71

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +97 -115
main.py CHANGED
@@ -14,8 +14,8 @@ from fastapi.templating import Jinja2Templates
14
 
15
  try:
16
  from sentence_transformers import SentenceTransformer
17
- except Exception: # pragma: no cover
18
- SentenceTransformer = None # type: ignore
19
 
20
 
21
  APP_TITLE = "Human Intelligence"
@@ -24,10 +24,10 @@ CONVERSATIONS_FILE = DATA_DIR / "conversations.json"
24
  EMBED_FILE = DATA_DIR / "embeddings.json"
25
  TEMPLATES_DIR = Path("/app/templates")
26
 
27
- SIMILARITY_THRESHOLD = float(os.environ.get("SIMILARITY_THRESHOLD", "0.78"))
28
  EMBED_MODEL_NAME = os.environ.get(
29
  "EMBED_MODEL_NAME",
30
- "sentence-transformers/paraphrase-MiniLM-L3-v2",
31
  )
32
 
33
  DATA_DIR.mkdir(parents=True, exist_ok=True)
@@ -39,6 +39,8 @@ templates = Jinja2Templates(directory=str(TEMPLATES_DIR))
39
  _embed_model = None
40
 
41
 
 
 
42
  def now_iso() -> str:
43
  return datetime.now(timezone.utc).isoformat(timespec="seconds")
44
 
@@ -65,21 +67,19 @@ def get_client_id(request: Request, payload: dict | None = None) -> str:
65
  header_value = request.headers.get("x-client-id", "").strip()
66
  if header_value:
67
  return header_value
68
-
69
  if payload:
70
  payload_value = str(payload.get("client_id", "")).strip()
71
  if payload_value:
72
  return payload_value
73
-
74
  return "anon"
75
 
76
 
77
  def anon_label(client_id: str) -> str:
78
- if not client_id or client_id == "anon":
79
- return "Anonymous"
80
  return "Anonymous"
81
 
82
 
 
 
83
  def load_embed_model():
84
  global _embed_model
85
  if _embed_model is None:
@@ -108,6 +108,8 @@ def save_embed_index(idx: dict[str, dict[str, Any]]) -> None:
108
  write_json(EMBED_FILE, idx)
109
 
110
 
 
 
111
  def load_conversations() -> list[dict[str, Any]]:
112
  data = read_json(CONVERSATIONS_FILE, [])
113
  if isinstance(data, dict) and "conversations" in data:
@@ -155,7 +157,6 @@ def normalize_answer(answer: dict[str, Any]) -> dict[str, Any]:
155
  versions,
156
  key=lambda v: (int(v.get("votes", 0)), str(v.get("created_at", ""))),
157
  )["id"]
158
-
159
  return a
160
 
161
 
@@ -189,16 +190,13 @@ def normalize_conversation(conversation: dict[str, Any]) -> dict[str, Any]:
189
  ]
190
 
191
  if not c["turns"] and c["question"]:
192
- c["turns"].append(
193
- {
194
- "id": uuid.uuid4().hex,
195
- "role": "user",
196
- "text": c["question"],
197
- "author": c.get("author", "Anonymous"),
198
- "ts": c["created_at"],
199
- }
200
- )
201
-
202
  return c
203
 
204
 
@@ -233,11 +231,9 @@ def ensure_embedding(conversation: dict[str, Any]) -> None:
233
  question = str(conversation.get("question", "")).strip()
234
  if not question:
235
  return
236
-
237
  vec = embed_text(question)
238
  if not vec:
239
  return
240
-
241
  idx = load_embed_index()
242
  idx[str(conversation["id"])] = {
243
  "question": question,
@@ -246,7 +242,12 @@ def ensure_embedding(conversation: dict[str, Any]) -> None:
246
  save_embed_index(idx)
247
 
248
 
249
- def find_similar_conversation(question: str) -> Optional[dict[str, Any]]:
 
 
 
 
 
250
  idx = load_embed_index()
251
  if not idx or SentenceTransformer is None:
252
  return None
@@ -255,7 +256,10 @@ def find_similar_conversation(question: str) -> Optional[dict[str, Any]]:
255
  if q_vec.size == 0:
256
  return None
257
 
258
- ids = list(idx.keys())
 
 
 
259
  try:
260
  vecs = np.array([idx[cid]["vector"] for cid in ids], dtype=float)
261
  except Exception:
@@ -277,11 +281,10 @@ def find_similar_conversation(question: str) -> Optional[dict[str, Any]]:
277
  if conv is None:
278
  return None
279
 
280
- return {
281
- "conversation": conv,
282
- "score": score,
283
- }
284
 
 
285
 
286
  def create_conversation(question: str, author: str = "Anonymous") -> dict[str, Any]:
287
  question = question.strip()
@@ -292,15 +295,13 @@ def create_conversation(question: str, author: str = "Anonymous") -> dict[str, A
292
  "author": author,
293
  "created_at": now,
294
  "updated_at": now,
295
- "turns": [
296
- {
297
- "id": uuid.uuid4().hex,
298
- "role": "user",
299
- "text": question,
300
- "author": author,
301
- "ts": now,
302
- }
303
- ],
304
  "answers": [],
305
  }
306
  conversation = save_conversation(conversation)
@@ -312,12 +313,10 @@ def active_version(answer: dict[str, Any]) -> Optional[dict[str, Any]]:
312
  versions = answer.get("versions", [])
313
  if not versions:
314
  return None
315
-
316
  active_id = answer.get("active_version")
317
  for version in versions:
318
  if version.get("id") == active_id:
319
  return version
320
-
321
  return max(
322
  versions,
323
  key=lambda v: (int(v.get("votes", 0)), str(v.get("created_at", ""))),
@@ -335,12 +334,10 @@ def best_answer_payload(conversation: dict[str, Any]) -> Optional[dict[str, Any]
335
  answers = conversation.get("answers", [])
336
  if not answers:
337
  return None
338
-
339
  best = max(answers, key=answer_score)
340
  av = active_version(best)
341
  if av is None:
342
  return None
343
-
344
  return {
345
  "answer_id": best["id"],
346
  "version_id": av["id"],
@@ -368,37 +365,31 @@ def add_answer(
368
  conversation = create_conversation(question_if_new, author)
369
 
370
  now = now_iso()
371
- version = normalize_version(
372
- {
373
- "text": text,
374
- "author": author,
375
- "created_at": now,
376
- "votes": 0,
377
- "votes_by_client": {},
378
- }
379
- )
380
- answer = normalize_answer(
381
- {
382
- "id": uuid.uuid4().hex,
383
- "versions": [version],
384
- "active_version": version["id"],
385
- "created_at": now,
386
- "updated_at": now,
387
- }
388
- )
389
 
390
  conversation["answers"].append(answer)
391
- conversation["turns"].append(
392
- {
393
- "id": uuid.uuid4().hex,
394
- "role": "assistant",
395
- "text": text,
396
- "author": author,
397
- "answer_id": answer["id"],
398
- "version_id": version["id"],
399
- "ts": now,
400
- }
401
- )
402
 
403
  save_conversation(conversation)
404
  return conversation, "ok"
@@ -423,15 +414,13 @@ def propose_version(
423
  continue
424
 
425
  now = now_iso()
426
- version = normalize_version(
427
- {
428
- "text": text,
429
- "author": author,
430
- "created_at": now,
431
- "votes": 0,
432
- "votes_by_client": {},
433
- }
434
- )
435
  answer["versions"].append(version)
436
  answer["updated_at"] = now
437
  save_conversation(conversation)
@@ -456,7 +445,6 @@ def vote_version(
456
  for answer in conversation["answers"]:
457
  if str(answer.get("id")) != answer_id:
458
  continue
459
-
460
  for version in answer.get("versions", []):
461
  if str(version.get("id")) != version_id:
462
  continue
@@ -486,6 +474,8 @@ def vote_version(
486
  return None, "version not found"
487
 
488
 
 
 
489
  @app.get("/", response_class=HTMLResponse)
490
  def home(request: Request):
491
  init = {
@@ -519,17 +509,17 @@ async def api(request: Request):
519
  client_id = get_client_id(request, payload)
520
  author = anon_label(client_id)
521
 
 
522
  if action == "init":
523
  conversation_id = str(payload.get("conversation_id", "")).strip()
524
  conversation = load_conversation(conversation_id) if conversation_id else None
525
- return JSONResponse(
526
- {
527
- "ok": True,
528
- "client_id": client_id,
529
- "conversation": conversation,
530
- }
531
- )
532
 
 
533
  if action == "get_conversation":
534
  conversation_id = str(payload.get("conversation_id", "")).strip()
535
  conversation = load_conversation(conversation_id)
@@ -537,42 +527,37 @@ async def api(request: Request):
537
  return JSONResponse({"ok": False, "error": "not found"})
538
  return JSONResponse({"ok": True, "conversation": conversation})
539
 
 
540
  if action == "ask":
541
  question = str(payload.get("question", "")).strip()
542
  if not question:
543
  return JSONResponse({"ok": False, "error": "empty question"})
544
 
 
545
  match = find_similar_conversation(question)
546
  if match and match.get("conversation"):
547
  conversation = match["conversation"]
548
  best = best_answer_payload(conversation)
549
- assistant_text = (
550
- best["text"]
551
- if best is not None
552
- else "No answer yet. You can write one."
553
- )
554
- return JSONResponse(
555
- {
556
- "ok": True,
557
- "matched": True,
558
- "similarity": match["score"],
559
- "conversation": conversation,
560
- "assistant_text": assistant_text,
561
- "best_answer": best,
562
- }
563
- )
564
-
565
- conversation = create_conversation(question, author)
566
- return JSONResponse(
567
- {
568
  "ok": True,
569
- "matched": False,
 
570
  "conversation": conversation,
571
- "assistant_text": "No answer yet. You can write one.",
572
- "best_answer": None,
573
- }
574
- )
575
 
 
 
 
 
 
 
 
 
 
 
 
576
  if action == "answer":
577
  conversation_id = str(payload.get("conversation_id", "")).strip()
578
  text = str(payload.get("text", "")).strip()
@@ -586,9 +571,9 @@ async def api(request: Request):
586
  )
587
  if conversation is None:
588
  return JSONResponse({"ok": False, "error": msg})
589
-
590
  return JSONResponse({"ok": True, "conversation": conversation})
591
 
 
592
  if action == "propose":
593
  conversation_id = str(payload.get("conversation_id", "")).strip()
594
  answer_id = str(payload.get("answer_id", "")).strip()
@@ -602,9 +587,9 @@ async def api(request: Request):
602
  )
603
  if conversation is None:
604
  return JSONResponse({"ok": False, "error": msg})
605
-
606
  return JSONResponse({"ok": True, "conversation": conversation})
607
 
 
608
  if action == "vote":
609
  conversation_id = str(payload.get("conversation_id", "")).strip()
610
  answer_id = str(payload.get("answer_id", "")).strip()
@@ -620,10 +605,8 @@ async def api(request: Request):
620
  )
621
  if conversation is None:
622
  return JSONResponse({"ok": False, "error": msg})
623
-
624
  if msg == "already_voted":
625
  return JSONResponse({"ok": False, "error": "already voted"})
626
-
627
  return JSONResponse({"ok": True, "conversation": conversation})
628
 
629
  return JSONResponse({"ok": False, "error": f"unknown action: {action}"})
@@ -631,5 +614,4 @@ async def api(request: Request):
631
 
632
  if __name__ == "__main__":
633
  import uvicorn
634
-
635
  uvicorn.run("main:app", host="0.0.0.0", port=7860, reload=False)
 
14
 
15
  try:
16
  from sentence_transformers import SentenceTransformer
17
+ except Exception:
18
+ SentenceTransformer = None
19
 
20
 
21
  APP_TITLE = "Human Intelligence"
 
24
  EMBED_FILE = DATA_DIR / "embeddings.json"
25
  TEMPLATES_DIR = Path("/app/templates")
26
 
27
+ SIMILARITY_THRESHOLD = float(os.environ.get("SIMILARITY_THRESHOLD", "0.62"))
28
  EMBED_MODEL_NAME = os.environ.get(
29
  "EMBED_MODEL_NAME",
30
+ "sentence-transformers/paraphrase-MiniLM-L6-v2",
31
  )
32
 
33
  DATA_DIR.mkdir(parents=True, exist_ok=True)
 
39
  _embed_model = None
40
 
41
 
42
+ # ────────────────────── Utilities ──────────────────────
43
+
44
  def now_iso() -> str:
45
  return datetime.now(timezone.utc).isoformat(timespec="seconds")
46
 
 
67
  header_value = request.headers.get("x-client-id", "").strip()
68
  if header_value:
69
  return header_value
 
70
  if payload:
71
  payload_value = str(payload.get("client_id", "")).strip()
72
  if payload_value:
73
  return payload_value
 
74
  return "anon"
75
 
76
 
77
  def anon_label(client_id: str) -> str:
 
 
78
  return "Anonymous"
79
 
80
 
81
+ # ────────────────────── Embeddings ──────────────────────
82
+
83
  def load_embed_model():
84
  global _embed_model
85
  if _embed_model is None:
 
108
  write_json(EMBED_FILE, idx)
109
 
110
 
111
+ # ────────────────────── Conversations CRUD ──────────────────────
112
+
113
  def load_conversations() -> list[dict[str, Any]]:
114
  data = read_json(CONVERSATIONS_FILE, [])
115
  if isinstance(data, dict) and "conversations" in data:
 
157
  versions,
158
  key=lambda v: (int(v.get("votes", 0)), str(v.get("created_at", ""))),
159
  )["id"]
 
160
  return a
161
 
162
 
 
190
  ]
191
 
192
  if not c["turns"] and c["question"]:
193
+ c["turns"].append({
194
+ "id": uuid.uuid4().hex,
195
+ "role": "user",
196
+ "text": c["question"],
197
+ "author": c.get("author", "Anonymous"),
198
+ "ts": c["created_at"],
199
+ })
 
 
 
200
  return c
201
 
202
 
 
231
  question = str(conversation.get("question", "")).strip()
232
  if not question:
233
  return
 
234
  vec = embed_text(question)
235
  if not vec:
236
  return
 
237
  idx = load_embed_index()
238
  idx[str(conversation["id"])] = {
239
  "question": question,
 
242
  save_embed_index(idx)
243
 
244
 
245
+ # ────────────────────── Semantic search ──────────────────────
246
+
247
+ def find_similar_conversation(
248
+ question: str,
249
+ exclude_id: str | None = None,
250
+ ) -> Optional[dict[str, Any]]:
251
  idx = load_embed_index()
252
  if not idx or SentenceTransformer is None:
253
  return None
 
256
  if q_vec.size == 0:
257
  return None
258
 
259
+ ids = [cid for cid in idx if cid != exclude_id]
260
+ if not ids:
261
+ return None
262
+
263
  try:
264
  vecs = np.array([idx[cid]["vector"] for cid in ids], dtype=float)
265
  except Exception:
 
281
  if conv is None:
282
  return None
283
 
284
+ return {"conversation": conv, "score": score}
285
+
 
 
286
 
287
+ # ────────────────────── Actions ──────────────────────
288
 
289
  def create_conversation(question: str, author: str = "Anonymous") -> dict[str, Any]:
290
  question = question.strip()
 
295
  "author": author,
296
  "created_at": now,
297
  "updated_at": now,
298
+ "turns": [{
299
+ "id": uuid.uuid4().hex,
300
+ "role": "user",
301
+ "text": question,
302
+ "author": author,
303
+ "ts": now,
304
+ }],
 
 
305
  "answers": [],
306
  }
307
  conversation = save_conversation(conversation)
 
313
  versions = answer.get("versions", [])
314
  if not versions:
315
  return None
 
316
  active_id = answer.get("active_version")
317
  for version in versions:
318
  if version.get("id") == active_id:
319
  return version
 
320
  return max(
321
  versions,
322
  key=lambda v: (int(v.get("votes", 0)), str(v.get("created_at", ""))),
 
334
  answers = conversation.get("answers", [])
335
  if not answers:
336
  return None
 
337
  best = max(answers, key=answer_score)
338
  av = active_version(best)
339
  if av is None:
340
  return None
 
341
  return {
342
  "answer_id": best["id"],
343
  "version_id": av["id"],
 
365
  conversation = create_conversation(question_if_new, author)
366
 
367
  now = now_iso()
368
+ version = normalize_version({
369
+ "text": text,
370
+ "author": author,
371
+ "created_at": now,
372
+ "votes": 0,
373
+ "votes_by_client": {},
374
+ })
375
+ answer = normalize_answer({
376
+ "id": uuid.uuid4().hex,
377
+ "versions": [version],
378
+ "active_version": version["id"],
379
+ "created_at": now,
380
+ "updated_at": now,
381
+ })
 
 
 
 
382
 
383
  conversation["answers"].append(answer)
384
+ conversation["turns"].append({
385
+ "id": uuid.uuid4().hex,
386
+ "role": "assistant",
387
+ "text": text,
388
+ "author": author,
389
+ "answer_id": answer["id"],
390
+ "version_id": version["id"],
391
+ "ts": now,
392
+ })
 
 
393
 
394
  save_conversation(conversation)
395
  return conversation, "ok"
 
414
  continue
415
 
416
  now = now_iso()
417
+ version = normalize_version({
418
+ "text": text,
419
+ "author": author,
420
+ "created_at": now,
421
+ "votes": 0,
422
+ "votes_by_client": {},
423
+ })
 
 
424
  answer["versions"].append(version)
425
  answer["updated_at"] = now
426
  save_conversation(conversation)
 
445
  for answer in conversation["answers"]:
446
  if str(answer.get("id")) != answer_id:
447
  continue
 
448
  for version in answer.get("versions", []):
449
  if str(version.get("id")) != version_id:
450
  continue
 
474
  return None, "version not found"
475
 
476
 
477
+ # ────────────────────── Routes ──────────────────────
478
+
479
  @app.get("/", response_class=HTMLResponse)
480
  def home(request: Request):
481
  init = {
 
509
  client_id = get_client_id(request, payload)
510
  author = anon_label(client_id)
511
 
512
+ # ── init ──
513
  if action == "init":
514
  conversation_id = str(payload.get("conversation_id", "")).strip()
515
  conversation = load_conversation(conversation_id) if conversation_id else None
516
+ return JSONResponse({
517
+ "ok": True,
518
+ "client_id": client_id,
519
+ "conversation": conversation,
520
+ })
 
 
521
 
522
+ # ── get_conversation ──
523
  if action == "get_conversation":
524
  conversation_id = str(payload.get("conversation_id", "")).strip()
525
  conversation = load_conversation(conversation_id)
 
527
  return JSONResponse({"ok": False, "error": "not found"})
528
  return JSONResponse({"ok": True, "conversation": conversation})
529
 
530
+ # ── ask ──
531
  if action == "ask":
532
  question = str(payload.get("question", "")).strip()
533
  if not question:
534
  return JSONResponse({"ok": False, "error": "empty question"})
535
 
536
+ # 1) Search FIRST β€” before creating anything
537
  match = find_similar_conversation(question)
538
  if match and match.get("conversation"):
539
  conversation = match["conversation"]
540
  best = best_answer_payload(conversation)
541
+ return JSONResponse({
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
542
  "ok": True,
543
+ "matched": True,
544
+ "similarity": match["score"],
545
  "conversation": conversation,
546
+ "assistant_text": best["text"] if best else "No answer yet. You can write one.",
547
+ "best_answer": best,
548
+ })
 
549
 
550
+ # 2) No match β€” create new
551
+ conversation = create_conversation(question, author)
552
+ return JSONResponse({
553
+ "ok": True,
554
+ "matched": False,
555
+ "conversation": conversation,
556
+ "assistant_text": "No answer yet. You can write one.",
557
+ "best_answer": None,
558
+ })
559
+
560
+ # ── answer ──
561
  if action == "answer":
562
  conversation_id = str(payload.get("conversation_id", "")).strip()
563
  text = str(payload.get("text", "")).strip()
 
571
  )
572
  if conversation is None:
573
  return JSONResponse({"ok": False, "error": msg})
 
574
  return JSONResponse({"ok": True, "conversation": conversation})
575
 
576
+ # ── propose ──
577
  if action == "propose":
578
  conversation_id = str(payload.get("conversation_id", "")).strip()
579
  answer_id = str(payload.get("answer_id", "")).strip()
 
587
  )
588
  if conversation is None:
589
  return JSONResponse({"ok": False, "error": msg})
 
590
  return JSONResponse({"ok": True, "conversation": conversation})
591
 
592
+ # ── vote ──
593
  if action == "vote":
594
  conversation_id = str(payload.get("conversation_id", "")).strip()
595
  answer_id = str(payload.get("answer_id", "")).strip()
 
605
  )
606
  if conversation is None:
607
  return JSONResponse({"ok": False, "error": msg})
 
608
  if msg == "already_voted":
609
  return JSONResponse({"ok": False, "error": "already voted"})
 
610
  return JSONResponse({"ok": True, "conversation": conversation})
611
 
612
  return JSONResponse({"ok": False, "error": f"unknown action: {action}"})
 
614
 
615
  if __name__ == "__main__":
616
  import uvicorn
 
617
  uvicorn.run("main:app", host="0.0.0.0", port=7860, reload=False)