wop commited on
Commit
ab9b30e
·
verified ·
1 Parent(s): 5c2cf3a

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +456 -293
main.py CHANGED
@@ -10,48 +10,37 @@ from typing import Any, Optional
10
  import numpy as np
11
  from fastapi import FastAPI, Request
12
  from fastapi.responses import HTMLResponse, JSONResponse
13
- from fastapi.staticfiles import StaticFiles
14
  from fastapi.templating import Jinja2Templates
15
 
16
- # Optional but recommended for similarity search.
17
- # If sentence-transformers is unavailable, the app still works with a fallback.
18
  try:
19
  from sentence_transformers import SentenceTransformer
20
  except Exception: # pragma: no cover
21
  SentenceTransformer = None # type: ignore
22
 
23
- try:
24
- from sklearn.metrics.pairwise import cosine_similarity
25
- except Exception: # pragma: no cover
26
- cosine_similarity = None # type: ignore
27
-
28
 
29
  APP_TITLE = "Human Intelligence"
30
  DATA_DIR = Path(os.environ.get("DATA_DIR", "/data"))
31
- THREADS_DIR = DATA_DIR / "threads"
32
- INDEX_FILE = DATA_DIR / "index.json"
33
  EMBED_FILE = DATA_DIR / "embeddings.json"
34
  TEMPLATES_DIR = Path("/app/templates")
35
- SIMILARITY_THRESHOLD = float(os.environ.get("SIMILARITY_THRESHOLD", "0.82"))
 
 
 
 
 
36
 
37
  DATA_DIR.mkdir(parents=True, exist_ok=True)
38
- THREADS_DIR.mkdir(parents=True, exist_ok=True)
39
  TEMPLATES_DIR.mkdir(parents=True, exist_ok=True)
40
 
41
  app = FastAPI(title=APP_TITLE)
42
  templates = Jinja2Templates(directory=str(TEMPLATES_DIR))
43
 
44
- # Serve any public assets you may add later.
45
- app.mount("/data", StaticFiles(directory=str(DATA_DIR)), name="data")
46
-
47
  _embed_model = None
48
 
49
 
50
- # ---------------------------------------------------------------------
51
- # Utilities
52
- # ---------------------------------------------------------------------
53
  def now_iso() -> str:
54
- return datetime.now(timezone.utc).isoformat()
55
 
56
 
57
  def read_json(path: Path, default: Any):
@@ -65,39 +54,38 @@ def read_json(path: Path, default: Any):
65
 
66
  def write_json(path: Path, data: Any) -> None:
67
  tmp = path.with_suffix(path.suffix + ".tmp")
68
- tmp.write_text(json.dumps(data, ensure_ascii=False, indent=2, default=str), encoding="utf-8")
 
 
 
69
  tmp.replace(path)
70
 
71
 
72
- def get_username(request: Request, payload: dict | None = None) -> str:
73
- """
74
- Priority:
75
- 1) X-User header
76
- 2) JSON payload username
77
- 3) guest
78
- This keeps the app simple and deployment-friendly.
79
- """
80
- header_name = request.headers.get("x-user", "").strip()
81
- if header_name:
82
- return header_name
83
 
84
  if payload:
85
- p_name = str(payload.get("username", "")).strip()
86
- if p_name:
87
- return p_name
 
 
 
88
 
89
- return "Guest"
 
 
 
90
 
91
 
92
- # ---------------------------------------------------------------------
93
- # Embeddings / similarity
94
- # ---------------------------------------------------------------------
95
  def load_embed_model():
96
  global _embed_model
97
  if _embed_model is None:
98
  if SentenceTransformer is None:
99
  return None
100
- _embed_model = SentenceTransformer("all-MiniLM-L6-v2")
101
  return _embed_model
102
 
103
 
@@ -105,282 +93,419 @@ def embed_text(text: str) -> list[float]:
105
  model = load_embed_model()
106
  if model is None:
107
  return []
108
-
109
  vec = model.encode(text, normalize_embeddings=True)
110
- return vec.tolist()
 
 
111
 
112
 
113
- def load_embed_index() -> dict:
114
- return read_json(EMBED_FILE, {})
 
115
 
116
 
117
- def save_embed_index(idx: dict) -> None:
118
  write_json(EMBED_FILE, idx)
119
 
120
 
121
- def find_similar_thread(question: str) -> Optional[tuple[str, float]]:
122
- idx = load_embed_index()
123
- if not idx:
124
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
 
126
- if cosine_similarity is None:
127
- return None
128
 
129
- q_vec = embed_text(question)
130
- if not q_vec:
131
- return None
132
 
133
- ids = list(idx.keys())
134
- vecs = np.array([idx[tid]["vector"] for tid in ids], dtype=float)
135
-
136
- if vecs.size == 0:
137
  return None
138
-
139
- sims = cosine_similarity(np.array(q_vec, dtype=float).reshape(1, -1), vecs)[0]
140
- best_i = int(np.argmax(sims))
141
- score = float(sims[best_i])
142
-
143
- if score >= SIMILARITY_THRESHOLD:
144
- return ids[best_i], score
145
  return None
146
 
147
 
148
- # ---------------------------------------------------------------------
149
- # Storage
150
- # ---------------------------------------------------------------------
151
- def load_index() -> list[dict]:
152
- return read_json(INDEX_FILE, [])
153
-
154
 
155
- def save_index(idx: list[dict]) -> None:
156
- write_json(INDEX_FILE, idx)
157
-
158
-
159
- def thread_path(tid: str) -> Path:
160
- return THREADS_DIR / f"{tid}.json"
 
 
 
161
 
 
 
162
 
163
- def load_thread(tid: str) -> Optional[dict]:
164
- return read_json(thread_path(tid), None)
165
 
 
 
 
 
166
 
167
- def save_thread(thread: dict) -> None:
168
- write_json(thread_path(thread["id"]), thread)
 
169
 
 
 
 
 
 
 
170
 
171
- def ensure_thread_schema(thread: dict) -> dict:
172
- thread.setdefault("id", uuid.uuid4().hex)
173
- thread.setdefault("question", "")
174
- thread.setdefault("author", "Guest")
175
- thread.setdefault("created_at", now_iso())
176
- thread.setdefault("messages", [])
177
- return thread
178
 
 
 
 
 
179
 
180
- # ---------------------------------------------------------------------
181
- # Content safety
182
- # ---------------------------------------------------------------------
183
- _toxic_pipe = None
184
 
185
- def load_safety_pipe():
186
- global _toxic_pipe
187
- if _toxic_pipe is None:
188
- try:
189
- from transformers import pipeline
190
- _toxic_pipe = pipeline(
191
- "text-classification",
192
- model="unitary/toxic-bert",
193
- device=-1,
194
- top_k=None,
195
- )
196
- except Exception:
197
- _toxic_pipe = False
198
- return _toxic_pipe
199
 
 
 
 
 
200
 
201
- def is_safe(text: str) -> tuple[bool, str]:
202
- if not text or not text.strip():
203
- return False, "empty"
204
 
205
- pipe = load_safety_pipe()
206
- if pipe is False:
207
- return True, "ok"
208
 
209
- try:
210
- results = pipe(text[:512])[0]
211
- for r in results:
212
- label = str(r.get("label", "")).lower()
213
- score = float(r.get("score", 0.0))
214
- if label != "non-toxic" and score > 0.70:
215
- return False, label or "unsafe"
216
- return True, "ok"
217
- except Exception:
218
- return True, "ok"
219
 
 
 
 
 
220
 
221
- # ---------------------------------------------------------------------
222
- # Core operations
223
- # ---------------------------------------------------------------------
224
- def create_thread(question: str, author: str) -> tuple[Optional[dict], str]:
225
- ok, reason = is_safe(question)
226
- if not ok:
227
- return None, f"Blocked: content flagged as {reason}."
228
 
229
- tid = uuid.uuid4().hex
230
- thread = {
231
- "id": tid,
 
 
232
  "question": question,
233
  "author": author,
234
- "created_at": now_iso(),
235
- "messages": [],
 
 
 
 
 
 
 
 
 
 
236
  }
237
- save_thread(thread)
 
 
238
 
239
- idx = load_index()
240
- idx.insert(
241
- 0,
242
- {
243
- "id": tid,
244
- "title": question[:120],
245
- "created_at": thread["created_at"],
246
- "author": author,
247
- "reply_count": 0,
248
- },
 
 
 
 
249
  )
250
- save_index(idx)
251
 
252
- emb_idx = load_embed_index()
253
- vec = embed_text(question)
254
- if vec:
255
- emb_idx[tid] = {"question": question, "vector": vec}
256
- save_embed_index(emb_idx)
257
 
258
- return thread, "ok"
 
 
 
 
259
 
260
 
261
- def add_answer(tid: str, text: str, author: str) -> tuple[Optional[dict], str]:
262
- ok, reason = is_safe(text)
263
- if not ok:
264
- return None, f"Blocked: content flagged as {reason}."
265
 
266
- thread = load_thread(tid)
267
- if thread is None:
268
- return None, "Thread not found."
 
269
 
270
- version = {
271
- "id": uuid.uuid4().hex,
272
- "text": text,
273
- "author": author,
274
- "created_at": now_iso(),
275
- "votes": 0,
276
- "voters": [],
277
- }
278
- message = {
279
- "id": uuid.uuid4().hex,
280
- "versions": [version],
281
- "active_version": version["id"],
282
- "created_at": version["created_at"],
283
  }
284
- thread["messages"].append(message)
285
- save_thread(thread)
286
 
287
- idx = load_index()
288
- for entry in idx:
289
- if entry["id"] == tid:
290
- entry["reply_count"] = len(thread["messages"])
291
- break
292
- save_index(idx)
293
 
294
- return thread, "ok"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
295
 
 
 
 
 
 
 
 
 
 
 
 
 
296
 
297
- def propose_version(tid: str, msg_id: str, text: str, author: str) -> tuple[Optional[dict], str]:
298
- ok, reason = is_safe(text)
299
- if not ok:
300
- return None, f"Blocked: content flagged as {reason}."
301
 
302
- thread = load_thread(tid)
303
- if thread is None:
304
- return None, "Thread not found."
305
 
306
- for msg in thread["messages"]:
307
- if msg["id"] == msg_id:
308
- version = {
309
- "id": uuid.uuid4().hex,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
310
  "text": text,
311
  "author": author,
312
- "created_at": now_iso(),
313
  "votes": 0,
314
- "voters": [],
315
  }
316
- msg["versions"].append(version)
317
- save_thread(thread)
318
- return thread, "ok"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
319
 
320
- return None, "Message not found."
321
 
 
 
 
322
 
323
- def vote_version(tid: str, msg_id: str, version_id: str, username: str) -> tuple[Optional[dict], str]:
324
- thread = load_thread(tid)
325
- if thread is None:
326
- return None, "Thread not found."
327
 
328
- for msg in thread["messages"]:
329
- if msg["id"] == msg_id:
330
- for v in msg["versions"]:
331
- if v["id"] == version_id:
332
- if username in v["voters"]:
333
- return thread, "already_voted"
334
- v["votes"] += 1
335
- v["voters"].append(username)
336
- break
337
 
338
- msg["active_version"] = max(msg["versions"], key=lambda x: x["votes"])["id"]
339
- save_thread(thread)
340
- return thread, "ok"
341
 
342
- return None, "Message not found."
 
343
 
 
 
 
 
 
344
 
345
- def get_active_version(msg: dict) -> dict:
346
- vid = msg.get("active_version")
347
- for v in msg.get("versions", []):
348
- if v.get("id") == vid:
349
- return v
350
- return msg.get("versions", [{}])[0]
351
 
352
 
353
- # ---------------------------------------------------------------------
354
- # API
355
- # ---------------------------------------------------------------------
356
  @app.get("/", response_class=HTMLResponse)
357
  def home(request: Request):
 
 
 
 
 
358
  return templates.TemplateResponse(
359
  "index.html",
360
  {
361
  "request": request,
362
  "app_title": APP_TITLE,
363
- "init_json": json.dumps(
364
- {
365
- "ok": True,
366
- "username": get_username(request),
367
- "threads": load_index(),
368
- },
369
- ensure_ascii=False,
370
- ),
371
  },
372
  )
373
 
374
 
375
- @app.get("/api/init")
376
- def api_init(request: Request):
377
- return JSONResponse(
378
- {
379
- "ok": True,
380
- "username": get_username(request),
381
- "threads": load_index(),
382
- }
383
- )
384
 
385
 
386
  @app.post("/api")
@@ -391,82 +516,120 @@ async def api(request: Request):
391
  return JSONResponse({"ok": False, "error": "bad payload"})
392
 
393
  action = str(payload.get("action", ""))
394
- username = get_username(request, payload)
 
395
 
396
  if action == "init":
397
- return JSONResponse({"ok": True, "username": username, "threads": load_index()})
398
-
399
- if action == "list_threads":
400
- return JSONResponse({"ok": True, "threads": load_index()})
 
 
 
 
 
401
 
402
- if action == "get_thread":
403
- tid = str(payload.get("thread_id", ""))
404
- thread = load_thread(tid)
405
- if thread is None:
406
  return JSONResponse({"ok": False, "error": "not found"})
407
- return JSONResponse({"ok": True, "thread": thread})
408
 
409
- if action == "new_thread":
410
- if not username or username == "Guest":
411
- return JSONResponse({"ok": False, "error": "not signed in"})
412
  question = str(payload.get("question", "")).strip()
413
  if not question:
414
  return JSONResponse({"ok": False, "error": "empty question"})
415
- sim = find_similar_thread(question)
416
- thread, msg = create_thread(question, username)
417
- if thread is None:
418
- return JSONResponse({"ok": False, "error": msg})
419
- return JSONResponse({"ok": True, "thread": thread, "similar": sim is not None})
420
 
421
- if action == "add_answer":
422
- if not username or username == "Guest":
423
- return JSONResponse({"ok": False, "error": "not signed in"})
424
- tid = str(payload.get("thread_id", ""))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
425
  text = str(payload.get("text", "")).strip()
426
- if not text:
427
- return JSONResponse({"ok": False, "error": "empty answer"})
428
- thread, msg = add_answer(tid, text, username)
429
- if thread is None:
 
 
 
 
 
430
  return JSONResponse({"ok": False, "error": msg})
431
- return JSONResponse({"ok": True, "thread": thread})
 
432
 
433
  if action == "propose":
434
- if not username or username == "Guest":
435
- return JSONResponse({"ok": False, "error": "not signed in"})
436
- tid = str(payload.get("thread_id", ""))
437
- msg_id = str(payload.get("msg_id", ""))
438
  text = str(payload.get("text", "")).strip()
439
- if not text:
440
- return JSONResponse({"ok": False, "error": "empty proposal"})
441
- thread, msg = propose_version(tid, msg_id, text, username)
442
- if thread is None:
 
 
 
 
443
  return JSONResponse({"ok": False, "error": msg})
444
- return JSONResponse({"ok": True, "thread": thread})
 
445
 
446
  if action == "vote":
447
- if not username or username == "Guest":
448
- return JSONResponse({"ok": False, "error": "not signed in"})
449
- tid = str(payload.get("thread_id", ""))
450
- msg_id = str(payload.get("msg_id", ""))
451
- version_id = str(payload.get("version_id", ""))
452
- thread, msg = vote_version(tid, msg_id, version_id, username)
453
- if thread is None:
 
 
 
 
 
 
454
  return JSONResponse({"ok": False, "error": msg})
 
455
  if msg == "already_voted":
456
  return JSONResponse({"ok": False, "error": "already voted"})
457
- return JSONResponse({"ok": True, "thread": thread})
458
-
459
- return JSONResponse({"ok": False, "error": f"unknown action: {action}"})
460
 
 
461
 
462
- # ---------------------------------------------------------------------
463
- # Optional healthcheck
464
- # ---------------------------------------------------------------------
465
- @app.get("/health")
466
- def health():
467
- return {"ok": True}
468
 
469
 
470
  if __name__ == "__main__":
471
  import uvicorn
472
- uvicorn.run("main:app", host="0.0.0.0", port=7860, reload=False)
 
 
10
  import numpy as np
11
  from fastapi import FastAPI, Request
12
  from fastapi.responses import HTMLResponse, JSONResponse
 
13
  from fastapi.templating import Jinja2Templates
14
 
 
 
15
  try:
16
  from sentence_transformers import SentenceTransformer
17
  except Exception: # pragma: no cover
18
  SentenceTransformer = None # type: ignore
19
 
 
 
 
 
 
20
 
21
  APP_TITLE = "Human Intelligence"
22
  DATA_DIR = Path(os.environ.get("DATA_DIR", "/data"))
23
+ CONVERSATIONS_FILE = DATA_DIR / "conversations.json"
 
24
  EMBED_FILE = DATA_DIR / "embeddings.json"
25
  TEMPLATES_DIR = Path("/app/templates")
26
+
27
+ SIMILARITY_THRESHOLD = float(os.environ.get("SIMILARITY_THRESHOLD", "0.78"))
28
+ EMBED_MODEL_NAME = os.environ.get(
29
+ "EMBED_MODEL_NAME",
30
+ "sentence-transformers/paraphrase-MiniLM-L3-v2",
31
+ )
32
 
33
  DATA_DIR.mkdir(parents=True, exist_ok=True)
 
34
  TEMPLATES_DIR.mkdir(parents=True, exist_ok=True)
35
 
36
  app = FastAPI(title=APP_TITLE)
37
  templates = Jinja2Templates(directory=str(TEMPLATES_DIR))
38
 
 
 
 
39
  _embed_model = None
40
 
41
 
 
 
 
42
  def now_iso() -> str:
43
+ return datetime.now(timezone.utc).isoformat(timespec="seconds")
44
 
45
 
46
  def read_json(path: Path, default: Any):
 
54
 
55
  def write_json(path: Path, data: Any) -> None:
56
  tmp = path.with_suffix(path.suffix + ".tmp")
57
+ tmp.write_text(
58
+ json.dumps(data, ensure_ascii=False, indent=2, default=str),
59
+ encoding="utf-8",
60
+ )
61
  tmp.replace(path)
62
 
63
 
64
+ def get_client_id(request: Request, payload: dict | None = None) -> str:
65
+ header_value = request.headers.get("x-client-id", "").strip()
66
+ if header_value:
67
+ return header_value
 
 
 
 
 
 
 
68
 
69
  if payload:
70
+ payload_value = str(payload.get("client_id", "")).strip()
71
+ if payload_value:
72
+ return payload_value
73
+
74
+ return "anon"
75
+
76
 
77
+ def anon_label(client_id: str) -> str:
78
+ if not client_id or client_id == "anon":
79
+ return "Anonymous"
80
+ return "Anonymous"
81
 
82
 
 
 
 
83
  def load_embed_model():
84
  global _embed_model
85
  if _embed_model is None:
86
  if SentenceTransformer is None:
87
  return None
88
+ _embed_model = SentenceTransformer(EMBED_MODEL_NAME)
89
  return _embed_model
90
 
91
 
 
93
  model = load_embed_model()
94
  if model is None:
95
  return []
 
96
  vec = model.encode(text, normalize_embeddings=True)
97
+ if hasattr(vec, "tolist"):
98
+ return vec.tolist()
99
+ return list(vec)
100
 
101
 
102
+ def load_embed_index() -> dict[str, dict[str, Any]]:
103
+ data = read_json(EMBED_FILE, {})
104
+ return data if isinstance(data, dict) else {}
105
 
106
 
107
+ def save_embed_index(idx: dict[str, dict[str, Any]]) -> None:
108
  write_json(EMBED_FILE, idx)
109
 
110
 
111
+ def load_conversations() -> list[dict[str, Any]]:
112
+ data = read_json(CONVERSATIONS_FILE, [])
113
+ if isinstance(data, dict) and "conversations" in data:
114
+ data = data["conversations"]
115
+ return data if isinstance(data, list) else []
116
+
117
+
118
+ def save_conversations(conversations: list[dict[str, Any]]) -> None:
119
+ write_json(CONVERSATIONS_FILE, conversations)
120
+
121
+
122
+ def normalize_version(version: dict[str, Any]) -> dict[str, Any]:
123
+ v = dict(version or {})
124
+ v.setdefault("id", uuid.uuid4().hex)
125
+ v.setdefault("text", "")
126
+ v.setdefault("author", "Anonymous")
127
+ v.setdefault("created_at", now_iso())
128
+ v.setdefault("votes", 0)
129
+ v.setdefault("votes_by_client", {})
130
+ if not isinstance(v["votes_by_client"], dict):
131
+ v["votes_by_client"] = {}
132
+ v["votes"] = int(v.get("votes", 0))
133
+ return v
134
+
135
+
136
+ def normalize_answer(answer: dict[str, Any]) -> dict[str, Any]:
137
+ a = dict(answer or {})
138
+ a.setdefault("id", uuid.uuid4().hex)
139
+ a.setdefault("versions", [])
140
+ a.setdefault("active_version", "")
141
+ a.setdefault("created_at", now_iso())
142
+ a.setdefault("updated_at", a["created_at"])
143
+
144
+ versions = [
145
+ normalize_version(v)
146
+ for v in a.get("versions", [])
147
+ if isinstance(v, dict)
148
+ ]
149
+ a["versions"] = versions
150
+
151
+ if versions:
152
+ version_ids = {v["id"] for v in versions}
153
+ if a["active_version"] not in version_ids:
154
+ a["active_version"] = max(
155
+ versions,
156
+ key=lambda v: (int(v.get("votes", 0)), str(v.get("created_at", ""))),
157
+ )["id"]
158
+
159
+ return a
160
+
161
+
162
+ def normalize_conversation(conversation: dict[str, Any]) -> dict[str, Any]:
163
+ c = dict(conversation or {})
164
+ c.setdefault("id", uuid.uuid4().hex)
165
+ c.setdefault("question", "")
166
+ c.setdefault("author", "Anonymous")
167
+ c.setdefault("created_at", now_iso())
168
+ c.setdefault("updated_at", c["created_at"])
169
+ c.setdefault("turns", [])
170
+ c.setdefault("answers", [])
171
+
172
+ turns: list[dict[str, Any]] = []
173
+ for turn in c.get("turns", []):
174
+ if not isinstance(turn, dict):
175
+ continue
176
+ t = dict(turn)
177
+ t.setdefault("id", uuid.uuid4().hex)
178
+ t.setdefault("role", "user")
179
+ t.setdefault("text", "")
180
+ t.setdefault("author", "Anonymous")
181
+ t.setdefault("ts", now_iso())
182
+ turns.append(t)
183
+ c["turns"] = turns
184
+
185
+ c["answers"] = [
186
+ normalize_answer(a)
187
+ for a in c.get("answers", [])
188
+ if isinstance(a, dict)
189
+ ]
190
+
191
+ if not c["turns"] and c["question"]:
192
+ c["turns"].append(
193
+ {
194
+ "id": uuid.uuid4().hex,
195
+ "role": "user",
196
+ "text": c["question"],
197
+ "author": c.get("author", "Anonymous"),
198
+ "ts": c["created_at"],
199
+ }
200
+ )
201
 
202
+ return c
 
203
 
 
 
 
204
 
205
+ def load_conversation(conversation_id: str) -> Optional[dict[str, Any]]:
206
+ if not conversation_id:
 
 
207
  return None
208
+ for conv in load_conversations():
209
+ if str(conv.get("id")) == conversation_id:
210
+ return normalize_conversation(conv)
 
 
 
 
211
  return None
212
 
213
 
214
+ def save_conversation(conversation: dict[str, Any]) -> dict[str, Any]:
215
+ conversation = normalize_conversation(conversation)
216
+ conversation["updated_at"] = now_iso()
 
 
 
217
 
218
+ conversations = [normalize_conversation(c) for c in load_conversations()]
219
+ replaced = False
220
+ for i, existing in enumerate(conversations):
221
+ if str(existing.get("id")) == str(conversation["id"]):
222
+ conversations[i] = conversation
223
+ replaced = True
224
+ break
225
+ if not replaced:
226
+ conversations.insert(0, conversation)
227
 
228
+ save_conversations(conversations)
229
+ return conversation
230
 
 
 
231
 
232
+ def ensure_embedding(conversation: dict[str, Any]) -> None:
233
+ question = str(conversation.get("question", "")).strip()
234
+ if not question:
235
+ return
236
 
237
+ vec = embed_text(question)
238
+ if not vec:
239
+ return
240
 
241
+ idx = load_embed_index()
242
+ idx[str(conversation["id"])] = {
243
+ "question": question,
244
+ "vector": vec,
245
+ }
246
+ save_embed_index(idx)
247
 
 
 
 
 
 
 
 
248
 
249
+ def find_similar_conversation(question: str) -> Optional[dict[str, Any]]:
250
+ idx = load_embed_index()
251
+ if not idx or SentenceTransformer is None:
252
+ return None
253
 
254
+ q_vec = np.array(embed_text(question), dtype=float)
255
+ if q_vec.size == 0:
256
+ return None
 
257
 
258
+ ids = list(idx.keys())
259
+ try:
260
+ vecs = np.array([idx[cid]["vector"] for cid in ids], dtype=float)
261
+ except Exception:
262
+ return None
 
 
 
 
 
 
 
 
 
263
 
264
+ if vecs.size == 0 or vecs.ndim != 2:
265
+ return None
266
+ if vecs.shape[1] != q_vec.shape[0]:
267
+ return None
268
 
269
+ sims = vecs @ q_vec
270
+ best_i = int(np.argmax(sims))
271
+ score = float(sims[best_i])
272
 
273
+ if score < SIMILARITY_THRESHOLD:
274
+ return None
 
275
 
276
+ conv = load_conversation(ids[best_i])
277
+ if conv is None:
278
+ return None
 
 
 
 
 
 
 
279
 
280
+ return {
281
+ "conversation": conv,
282
+ "score": score,
283
+ }
284
 
 
 
 
 
 
 
 
285
 
286
+ def create_conversation(question: str, author: str = "Anonymous") -> dict[str, Any]:
287
+ question = question.strip()
288
+ now = now_iso()
289
+ conversation = {
290
+ "id": uuid.uuid4().hex,
291
  "question": question,
292
  "author": author,
293
+ "created_at": now,
294
+ "updated_at": now,
295
+ "turns": [
296
+ {
297
+ "id": uuid.uuid4().hex,
298
+ "role": "user",
299
+ "text": question,
300
+ "author": author,
301
+ "ts": now,
302
+ }
303
+ ],
304
+ "answers": [],
305
  }
306
+ conversation = save_conversation(conversation)
307
+ ensure_embedding(conversation)
308
+ return conversation
309
 
310
+
311
+ def active_version(answer: dict[str, Any]) -> Optional[dict[str, Any]]:
312
+ versions = answer.get("versions", [])
313
+ if not versions:
314
+ return None
315
+
316
+ active_id = answer.get("active_version")
317
+ for version in versions:
318
+ if version.get("id") == active_id:
319
+ return version
320
+
321
+ return max(
322
+ versions,
323
+ key=lambda v: (int(v.get("votes", 0)), str(v.get("created_at", ""))),
324
  )
 
325
 
 
 
 
 
 
326
 
327
+ def answer_score(answer: dict[str, Any]) -> tuple[int, str]:
328
+ av = active_version(answer)
329
+ if av is None:
330
+ return 0, str(answer.get("created_at", ""))
331
+ return int(av.get("votes", 0)), str(answer.get("created_at", ""))
332
 
333
 
334
+ def best_answer_payload(conversation: dict[str, Any]) -> Optional[dict[str, Any]]:
335
+ answers = conversation.get("answers", [])
336
+ if not answers:
337
+ return None
338
 
339
+ best = max(answers, key=answer_score)
340
+ av = active_version(best)
341
+ if av is None:
342
+ return None
343
 
344
+ return {
345
+ "answer_id": best["id"],
346
+ "version_id": av["id"],
347
+ "text": av["text"],
348
+ "votes": int(av.get("votes", 0)),
349
+ "author": av.get("author", "Anonymous"),
350
+ "created_at": av.get("created_at", ""),
 
 
 
 
 
 
351
  }
 
 
352
 
 
 
 
 
 
 
353
 
354
+ def add_answer(
355
+ conversation_id: str,
356
+ text: str,
357
+ author: str = "Anonymous",
358
+ question_if_new: str | None = None,
359
+ ) -> tuple[Optional[dict[str, Any]], str]:
360
+ text = text.strip()
361
+ if not text:
362
+ return None, "empty answer"
363
+
364
+ conversation = load_conversation(conversation_id)
365
+ if conversation is None:
366
+ if not question_if_new:
367
+ return None, "conversation not found"
368
+ conversation = create_conversation(question_if_new, author)
369
+
370
+ now = now_iso()
371
+ version = normalize_version(
372
+ {
373
+ "text": text,
374
+ "author": author,
375
+ "created_at": now,
376
+ "votes": 0,
377
+ "votes_by_client": {},
378
+ }
379
+ )
380
+ answer = normalize_answer(
381
+ {
382
+ "id": uuid.uuid4().hex,
383
+ "versions": [version],
384
+ "active_version": version["id"],
385
+ "created_at": now,
386
+ "updated_at": now,
387
+ }
388
+ )
389
 
390
+ conversation["answers"].append(answer)
391
+ conversation["turns"].append(
392
+ {
393
+ "id": uuid.uuid4().hex,
394
+ "role": "assistant",
395
+ "text": text,
396
+ "author": author,
397
+ "answer_id": answer["id"],
398
+ "version_id": version["id"],
399
+ "ts": now,
400
+ }
401
+ )
402
 
403
+ save_conversation(conversation)
404
+ return conversation, "ok"
 
 
405
 
 
 
 
406
 
407
+ def propose_version(
408
+ conversation_id: str,
409
+ answer_id: str,
410
+ text: str,
411
+ author: str = "Anonymous",
412
+ ) -> tuple[Optional[dict[str, Any]], str]:
413
+ text = text.strip()
414
+ if not text:
415
+ return None, "empty proposal"
416
+
417
+ conversation = load_conversation(conversation_id)
418
+ if conversation is None:
419
+ return None, "conversation not found"
420
+
421
+ for answer in conversation["answers"]:
422
+ if str(answer.get("id")) != answer_id:
423
+ continue
424
+
425
+ now = now_iso()
426
+ version = normalize_version(
427
+ {
428
  "text": text,
429
  "author": author,
430
+ "created_at": now,
431
  "votes": 0,
432
+ "votes_by_client": {},
433
  }
434
+ )
435
+ answer["versions"].append(version)
436
+ answer["updated_at"] = now
437
+ save_conversation(conversation)
438
+ return conversation, "ok"
439
+
440
+ return None, "answer not found"
441
+
442
+
443
+ def vote_version(
444
+ conversation_id: str,
445
+ answer_id: str,
446
+ version_id: str,
447
+ client_id: str,
448
+ delta: int,
449
+ ) -> tuple[Optional[dict[str, Any]], str]:
450
+ conversation = load_conversation(conversation_id)
451
+ if conversation is None:
452
+ return None, "conversation not found"
453
 
454
+ delta = 1 if int(delta) >= 0 else -1
455
 
456
+ for answer in conversation["answers"]:
457
+ if str(answer.get("id")) != answer_id:
458
+ continue
459
 
460
+ for version in answer.get("versions", []):
461
+ if str(version.get("id")) != version_id:
462
+ continue
 
463
 
464
+ votes_by_client = version.setdefault("votes_by_client", {})
465
+ if not isinstance(votes_by_client, dict):
466
+ votes_by_client = {}
467
+ version["votes_by_client"] = votes_by_client
 
 
 
 
 
468
 
469
+ current = int(votes_by_client.get(client_id, 0))
470
+ if current == delta:
471
+ return conversation, "already_voted"
472
 
473
+ votes_by_client[client_id] = delta
474
+ version["votes"] = int(sum(int(v) for v in votes_by_client.values()))
475
 
476
+ if answer.get("versions"):
477
+ answer["active_version"] = max(
478
+ answer["versions"],
479
+ key=lambda v: (int(v.get("votes", 0)), str(v.get("created_at", ""))),
480
+ )["id"]
481
 
482
+ conversation["updated_at"] = now_iso()
483
+ save_conversation(conversation)
484
+ return conversation, "ok"
485
+
486
+ return None, "version not found"
 
487
 
488
 
 
 
 
489
  @app.get("/", response_class=HTMLResponse)
490
  def home(request: Request):
491
+ init = {
492
+ "ok": True,
493
+ "client_id": get_client_id(request),
494
+ "conversation": None,
495
+ }
496
  return templates.TemplateResponse(
497
  "index.html",
498
  {
499
  "request": request,
500
  "app_title": APP_TITLE,
501
+ "init_json": json.dumps(init, ensure_ascii=False),
 
 
 
 
 
 
 
502
  },
503
  )
504
 
505
 
506
+ @app.get("/health")
507
+ def health():
508
+ return {"ok": True}
 
 
 
 
 
 
509
 
510
 
511
  @app.post("/api")
 
516
  return JSONResponse({"ok": False, "error": "bad payload"})
517
 
518
  action = str(payload.get("action", ""))
519
+ client_id = get_client_id(request, payload)
520
+ author = anon_label(client_id)
521
 
522
  if action == "init":
523
+ conversation_id = str(payload.get("conversation_id", "")).strip()
524
+ conversation = load_conversation(conversation_id) if conversation_id else None
525
+ return JSONResponse(
526
+ {
527
+ "ok": True,
528
+ "client_id": client_id,
529
+ "conversation": conversation,
530
+ }
531
+ )
532
 
533
+ if action == "get_conversation":
534
+ conversation_id = str(payload.get("conversation_id", "")).strip()
535
+ conversation = load_conversation(conversation_id)
536
+ if conversation is None:
537
  return JSONResponse({"ok": False, "error": "not found"})
538
+ return JSONResponse({"ok": True, "conversation": conversation})
539
 
540
+ if action == "ask":
 
 
541
  question = str(payload.get("question", "")).strip()
542
  if not question:
543
  return JSONResponse({"ok": False, "error": "empty question"})
 
 
 
 
 
544
 
545
+ match = find_similar_conversation(question)
546
+ if match and match.get("conversation"):
547
+ conversation = match["conversation"]
548
+ best = best_answer_payload(conversation)
549
+ assistant_text = (
550
+ best["text"]
551
+ if best is not None
552
+ else "No answer yet. You can write one."
553
+ )
554
+ return JSONResponse(
555
+ {
556
+ "ok": True,
557
+ "matched": True,
558
+ "similarity": match["score"],
559
+ "conversation": conversation,
560
+ "assistant_text": assistant_text,
561
+ "best_answer": best,
562
+ }
563
+ )
564
+
565
+ conversation = create_conversation(question, author)
566
+ return JSONResponse(
567
+ {
568
+ "ok": True,
569
+ "matched": False,
570
+ "conversation": conversation,
571
+ "assistant_text": "No answer yet. You can write one.",
572
+ "best_answer": None,
573
+ }
574
+ )
575
+
576
+ if action == "answer":
577
+ conversation_id = str(payload.get("conversation_id", "")).strip()
578
  text = str(payload.get("text", "")).strip()
579
+ question = str(payload.get("question", "")).strip() or None
580
+
581
+ conversation, msg = add_answer(
582
+ conversation_id=conversation_id,
583
+ text=text,
584
+ author=author,
585
+ question_if_new=question,
586
+ )
587
+ if conversation is None:
588
  return JSONResponse({"ok": False, "error": msg})
589
+
590
+ return JSONResponse({"ok": True, "conversation": conversation})
591
 
592
  if action == "propose":
593
+ conversation_id = str(payload.get("conversation_id", "")).strip()
594
+ answer_id = str(payload.get("answer_id", "")).strip()
 
 
595
  text = str(payload.get("text", "")).strip()
596
+
597
+ conversation, msg = propose_version(
598
+ conversation_id=conversation_id,
599
+ answer_id=answer_id,
600
+ text=text,
601
+ author=author,
602
+ )
603
+ if conversation is None:
604
  return JSONResponse({"ok": False, "error": msg})
605
+
606
+ return JSONResponse({"ok": True, "conversation": conversation})
607
 
608
  if action == "vote":
609
+ conversation_id = str(payload.get("conversation_id", "")).strip()
610
+ answer_id = str(payload.get("answer_id", "")).strip()
611
+ version_id = str(payload.get("version_id", "")).strip()
612
+ delta = int(payload.get("delta", 1))
613
+
614
+ conversation, msg = vote_version(
615
+ conversation_id=conversation_id,
616
+ answer_id=answer_id,
617
+ version_id=version_id,
618
+ client_id=client_id,
619
+ delta=delta,
620
+ )
621
+ if conversation is None:
622
  return JSONResponse({"ok": False, "error": msg})
623
+
624
  if msg == "already_voted":
625
  return JSONResponse({"ok": False, "error": "already voted"})
 
 
 
626
 
627
+ return JSONResponse({"ok": True, "conversation": conversation})
628
 
629
+ return JSONResponse({"ok": False, "error": f"unknown action: {action}"})
 
 
 
 
 
630
 
631
 
632
  if __name__ == "__main__":
633
  import uvicorn
634
+
635
+ uvicorn.run("main:app", host="0.0.0.0", port=7860, reload=False)