Spaces:

Human-Intelligence
/

Chat

Running

App Files Files Community

wop commited on 19 days ago

Commit

976e888

verified ·

1 Parent(s): 7a1ea71

Update main.py

Browse files

Files changed (1) hide show

main.py +97 -115

main.py CHANGED Viewed

@@ -14,8 +14,8 @@ from fastapi.templating import Jinja2Templates
 try:
     from sentence_transformers import SentenceTransformer
-except Exception:  # pragma: no cover
-    SentenceTransformer = None  # type: ignore
 APP_TITLE = "Human Intelligence"
@@ -24,10 +24,10 @@ CONVERSATIONS_FILE = DATA_DIR / "conversations.json"
 EMBED_FILE = DATA_DIR / "embeddings.json"
 TEMPLATES_DIR = Path("/app/templates")
-SIMILARITY_THRESHOLD = float(os.environ.get("SIMILARITY_THRESHOLD", "0.78"))
 EMBED_MODEL_NAME = os.environ.get(
     "EMBED_MODEL_NAME",
-    "sentence-transformers/paraphrase-MiniLM-L3-v2",
 )
 DATA_DIR.mkdir(parents=True, exist_ok=True)
@@ -39,6 +39,8 @@ templates = Jinja2Templates(directory=str(TEMPLATES_DIR))
 _embed_model = None
 def now_iso() -> str:
     return datetime.now(timezone.utc).isoformat(timespec="seconds")
@@ -65,21 +67,19 @@ def get_client_id(request: Request, payload: dict | None = None) -> str:
     header_value = request.headers.get("x-client-id", "").strip()
     if header_value:
         return header_value
     if payload:
         payload_value = str(payload.get("client_id", "")).strip()
         if payload_value:
             return payload_value
     return "anon"
 def anon_label(client_id: str) -> str:
-    if not client_id or client_id == "anon":
-        return "Anonymous"
     return "Anonymous"
 def load_embed_model():
     global _embed_model
     if _embed_model is None:
@@ -108,6 +108,8 @@ def save_embed_index(idx: dict[str, dict[str, Any]]) -> None:
     write_json(EMBED_FILE, idx)
 def load_conversations() -> list[dict[str, Any]]:
     data = read_json(CONVERSATIONS_FILE, [])
     if isinstance(data, dict) and "conversations" in data:
@@ -155,7 +157,6 @@ def normalize_answer(answer: dict[str, Any]) -> dict[str, Any]:
                 versions,
                 key=lambda v: (int(v.get("votes", 0)), str(v.get("created_at", ""))),
             )["id"]
     return a
@@ -189,16 +190,13 @@ def normalize_conversation(conversation: dict[str, Any]) -> dict[str, Any]:
     ]
     if not c["turns"] and c["question"]:
-        c["turns"].append(
-            {
-                "id": uuid.uuid4().hex,
-                "role": "user",
-                "text": c["question"],
-                "author": c.get("author", "Anonymous"),
-                "ts": c["created_at"],
-            }
-        )
     return c
@@ -233,11 +231,9 @@ def ensure_embedding(conversation: dict[str, Any]) -> None:
     question = str(conversation.get("question", "")).strip()
     if not question:
         return
     vec = embed_text(question)
     if not vec:
         return
     idx = load_embed_index()
     idx[str(conversation["id"])] = {
         "question": question,
@@ -246,7 +242,12 @@ def ensure_embedding(conversation: dict[str, Any]) -> None:
     save_embed_index(idx)
-def find_similar_conversation(question: str) -> Optional[dict[str, Any]]:
     idx = load_embed_index()
     if not idx or SentenceTransformer is None:
         return None
@@ -255,7 +256,10 @@ def find_similar_conversation(question: str) -> Optional[dict[str, Any]]:
     if q_vec.size == 0:
         return None
-    ids = list(idx.keys())
     try:
         vecs = np.array([idx[cid]["vector"] for cid in ids], dtype=float)
     except Exception:
@@ -277,11 +281,10 @@ def find_similar_conversation(question: str) -> Optional[dict[str, Any]]:
     if conv is None:
         return None
-    return {
-        "conversation": conv,
-        "score": score,
-    }
 def create_conversation(question: str, author: str = "Anonymous") -> dict[str, Any]:
     question = question.strip()
@@ -292,15 +295,13 @@ def create_conversation(question: str, author: str = "Anonymous") -> dict[str, A
         "author": author,
         "created_at": now,
         "updated_at": now,
-        "turns": [
-            {
-                "id": uuid.uuid4().hex,
-                "role": "user",
-                "text": question,
-                "author": author,
-                "ts": now,
-            }
-        ],
         "answers": [],
     }
     conversation = save_conversation(conversation)
@@ -312,12 +313,10 @@ def active_version(answer: dict[str, Any]) -> Optional[dict[str, Any]]:
     versions = answer.get("versions", [])
     if not versions:
         return None
     active_id = answer.get("active_version")
     for version in versions:
         if version.get("id") == active_id:
             return version
     return max(
         versions,
         key=lambda v: (int(v.get("votes", 0)), str(v.get("created_at", ""))),
@@ -335,12 +334,10 @@ def best_answer_payload(conversation: dict[str, Any]) -> Optional[dict[str, Any]
     answers = conversation.get("answers", [])
     if not answers:
         return None
     best = max(answers, key=answer_score)
     av = active_version(best)
     if av is None:
         return None
     return {
         "answer_id": best["id"],
         "version_id": av["id"],
@@ -368,37 +365,31 @@ def add_answer(
         conversation = create_conversation(question_if_new, author)
     now = now_iso()
-    version = normalize_version(
-        {
-            "text": text,
-            "author": author,
-            "created_at": now,
-            "votes": 0,
-            "votes_by_client": {},
-        }
-    )
-    answer = normalize_answer(
-        {
-            "id": uuid.uuid4().hex,
-            "versions": [version],
-            "active_version": version["id"],
-            "created_at": now,
-            "updated_at": now,
-        }
-    )
     conversation["answers"].append(answer)
-    conversation["turns"].append(
-        {
-            "id": uuid.uuid4().hex,
-            "role": "assistant",
-            "text": text,
-            "author": author,
-            "answer_id": answer["id"],
-            "version_id": version["id"],
-            "ts": now,
-        }
-    )
     save_conversation(conversation)
     return conversation, "ok"
@@ -423,15 +414,13 @@ def propose_version(
             continue
         now = now_iso()
-        version = normalize_version(
-            {
-                "text": text,
-                "author": author,
-                "created_at": now,
-                "votes": 0,
-                "votes_by_client": {},
-            }
-        )
         answer["versions"].append(version)
         answer["updated_at"] = now
         save_conversation(conversation)
@@ -456,7 +445,6 @@ def vote_version(
     for answer in conversation["answers"]:
         if str(answer.get("id")) != answer_id:
             continue
         for version in answer.get("versions", []):
             if str(version.get("id")) != version_id:
                 continue
@@ -486,6 +474,8 @@ def vote_version(
     return None, "version not found"
 @app.get("/", response_class=HTMLResponse)
 def home(request: Request):
     init = {
@@ -519,17 +509,17 @@ async def api(request: Request):
     client_id = get_client_id(request, payload)
     author = anon_label(client_id)
     if action == "init":
         conversation_id = str(payload.get("conversation_id", "")).strip()
         conversation = load_conversation(conversation_id) if conversation_id else None
-        return JSONResponse(
-            {
-                "ok": True,
-                "client_id": client_id,
-                "conversation": conversation,
-            }
-        )
     if action == "get_conversation":
         conversation_id = str(payload.get("conversation_id", "")).strip()
         conversation = load_conversation(conversation_id)
@@ -537,42 +527,37 @@ async def api(request: Request):
             return JSONResponse({"ok": False, "error": "not found"})
         return JSONResponse({"ok": True, "conversation": conversation})
     if action == "ask":
         question = str(payload.get("question", "")).strip()
         if not question:
             return JSONResponse({"ok": False, "error": "empty question"})
         match = find_similar_conversation(question)
         if match and match.get("conversation"):
             conversation = match["conversation"]
             best = best_answer_payload(conversation)
-            assistant_text = (
-                best["text"]
-                if best is not None
-                else "No answer yet. You can write one."
-            )
-            return JSONResponse(
-                {
-                    "ok": True,
-                    "matched": True,
-                    "similarity": match["score"],
-                    "conversation": conversation,
-                    "assistant_text": assistant_text,
-                    "best_answer": best,
-                }
-            )
-        conversation = create_conversation(question, author)
-        return JSONResponse(
-            {
                 "ok": True,
-                "matched": False,
                 "conversation": conversation,
-                "assistant_text": "No answer yet. You can write one.",
-                "best_answer": None,
-            }
-        )
     if action == "answer":
         conversation_id = str(payload.get("conversation_id", "")).strip()
         text = str(payload.get("text", "")).strip()
@@ -586,9 +571,9 @@ async def api(request: Request):
         )
         if conversation is None:
             return JSONResponse({"ok": False, "error": msg})
         return JSONResponse({"ok": True, "conversation": conversation})
     if action == "propose":
         conversation_id = str(payload.get("conversation_id", "")).strip()
         answer_id = str(payload.get("answer_id", "")).strip()
@@ -602,9 +587,9 @@ async def api(request: Request):
         )
         if conversation is None:
             return JSONResponse({"ok": False, "error": msg})
         return JSONResponse({"ok": True, "conversation": conversation})
     if action == "vote":
         conversation_id = str(payload.get("conversation_id", "")).strip()
         answer_id = str(payload.get("answer_id", "")).strip()
@@ -620,10 +605,8 @@ async def api(request: Request):
         )
         if conversation is None:
             return JSONResponse({"ok": False, "error": msg})
         if msg == "already_voted":
             return JSONResponse({"ok": False, "error": "already voted"})
         return JSONResponse({"ok": True, "conversation": conversation})
     return JSONResponse({"ok": False, "error": f"unknown action: {action}"})
@@ -631,5 +614,4 @@ async def api(request: Request):
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run("main:app", host="0.0.0.0", port=7860, reload=False)

 try:
     from sentence_transformers import SentenceTransformer
+except Exception:
+    SentenceTransformer = None
 APP_TITLE = "Human Intelligence"
 EMBED_FILE = DATA_DIR / "embeddings.json"
 TEMPLATES_DIR = Path("/app/templates")
+SIMILARITY_THRESHOLD = float(os.environ.get("SIMILARITY_THRESHOLD", "0.62"))
 EMBED_MODEL_NAME = os.environ.get(
     "EMBED_MODEL_NAME",
+    "sentence-transformers/paraphrase-MiniLM-L6-v2",
 )
 DATA_DIR.mkdir(parents=True, exist_ok=True)
 _embed_model = None
+# ────────────────────── Utilities ──────────────────────
 def now_iso() -> str:
     return datetime.now(timezone.utc).isoformat(timespec="seconds")
     header_value = request.headers.get("x-client-id", "").strip()
     if header_value:
         return header_value
     if payload:
         payload_value = str(payload.get("client_id", "")).strip()
         if payload_value:
             return payload_value
     return "anon"
 def anon_label(client_id: str) -> str:
     return "Anonymous"
+# ────────────────────── Embeddings ──────────────────────
 def load_embed_model():
     global _embed_model
     if _embed_model is None:
     write_json(EMBED_FILE, idx)
+# ────────────────────── Conversations CRUD ──────────────────────
 def load_conversations() -> list[dict[str, Any]]:
     data = read_json(CONVERSATIONS_FILE, [])
     if isinstance(data, dict) and "conversations" in data:
                 versions,
                 key=lambda v: (int(v.get("votes", 0)), str(v.get("created_at", ""))),
             )["id"]
     return a
     ]
     if not c["turns"] and c["question"]:
+        c["turns"].append({
+            "id": uuid.uuid4().hex,
+            "role": "user",
+            "text": c["question"],
+            "author": c.get("author", "Anonymous"),
+            "ts": c["created_at"],
+        })
     return c
     question = str(conversation.get("question", "")).strip()
     if not question:
         return
     vec = embed_text(question)
     if not vec:
         return
     idx = load_embed_index()
     idx[str(conversation["id"])] = {
         "question": question,
     save_embed_index(idx)
+# ────────────────────── Semantic search ──────────────────────
+def find_similar_conversation(
+    question: str,
+    exclude_id: str | None = None,
+) -> Optional[dict[str, Any]]:
     idx = load_embed_index()
     if not idx or SentenceTransformer is None:
         return None
     if q_vec.size == 0:
         return None
+    ids = [cid for cid in idx if cid != exclude_id]
+    if not ids:
+        return None
     try:
         vecs = np.array([idx[cid]["vector"] for cid in ids], dtype=float)
     except Exception:
     if conv is None:
         return None
+    return {"conversation": conv, "score": score}
+# ────────────────────── Actions ──────────────────────
 def create_conversation(question: str, author: str = "Anonymous") -> dict[str, Any]:
     question = question.strip()
         "author": author,
         "created_at": now,
         "updated_at": now,
+        "turns": [{
+            "id": uuid.uuid4().hex,
+            "role": "user",
+            "text": question,
+            "author": author,
+            "ts": now,
+        }],
         "answers": [],
     }
     conversation = save_conversation(conversation)
     versions = answer.get("versions", [])
     if not versions:
         return None
     active_id = answer.get("active_version")
     for version in versions:
         if version.get("id") == active_id:
             return version
     return max(
         versions,
         key=lambda v: (int(v.get("votes", 0)), str(v.get("created_at", ""))),
     answers = conversation.get("answers", [])
     if not answers:
         return None
     best = max(answers, key=answer_score)
     av = active_version(best)
     if av is None:
         return None
     return {
         "answer_id": best["id"],
         "version_id": av["id"],
         conversation = create_conversation(question_if_new, author)
     now = now_iso()
+    version = normalize_version({
+        "text": text,
+        "author": author,
+        "created_at": now,
+        "votes": 0,
+        "votes_by_client": {},
+    })
+    answer = normalize_answer({
+        "id": uuid.uuid4().hex,
+        "versions": [version],
+        "active_version": version["id"],
+        "created_at": now,
+        "updated_at": now,
+    })
     conversation["answers"].append(answer)
+    conversation["turns"].append({
+        "id": uuid.uuid4().hex,
+        "role": "assistant",
+        "text": text,
+        "author": author,
+        "answer_id": answer["id"],
+        "version_id": version["id"],
+        "ts": now,
+    })
     save_conversation(conversation)
     return conversation, "ok"
             continue
         now = now_iso()
+        version = normalize_version({
+            "text": text,
+            "author": author,
+            "created_at": now,
+            "votes": 0,
+            "votes_by_client": {},
+        })
         answer["versions"].append(version)
         answer["updated_at"] = now
         save_conversation(conversation)
     for answer in conversation["answers"]:
         if str(answer.get("id")) != answer_id:
             continue
         for version in answer.get("versions", []):
             if str(version.get("id")) != version_id:
                 continue
     return None, "version not found"
+# ────────────────────── Routes ──────────────────────
 @app.get("/", response_class=HTMLResponse)
 def home(request: Request):
     init = {
     client_id = get_client_id(request, payload)
     author = anon_label(client_id)
+    # ── init ──
     if action == "init":
         conversation_id = str(payload.get("conversation_id", "")).strip()
         conversation = load_conversation(conversation_id) if conversation_id else None
+        return JSONResponse({
+            "ok": True,
+            "client_id": client_id,
+            "conversation": conversation,
+        })
+    # ── get_conversation ──
     if action == "get_conversation":
         conversation_id = str(payload.get("conversation_id", "")).strip()
         conversation = load_conversation(conversation_id)
             return JSONResponse({"ok": False, "error": "not found"})
         return JSONResponse({"ok": True, "conversation": conversation})
+    # ── ask ──
     if action == "ask":
         question = str(payload.get("question", "")).strip()
         if not question:
             return JSONResponse({"ok": False, "error": "empty question"})
+        # 1) Search FIRST — before creating anything
         match = find_similar_conversation(question)
         if match and match.get("conversation"):
             conversation = match["conversation"]
             best = best_answer_payload(conversation)
+            return JSONResponse({
                 "ok": True,
+                "matched": True,
+                "similarity": match["score"],
                 "conversation": conversation,
+                "assistant_text": best["text"] if best else "No answer yet. You can write one.",
+                "best_answer": best,
+            })
+        # 2) No match — create new
+        conversation = create_conversation(question, author)
+        return JSONResponse({
+            "ok": True,
+            "matched": False,
+            "conversation": conversation,
+            "assistant_text": "No answer yet. You can write one.",
+            "best_answer": None,
+        })
+    # ── answer ──
     if action == "answer":
         conversation_id = str(payload.get("conversation_id", "")).strip()
         text = str(payload.get("text", "")).strip()
         )
         if conversation is None:
             return JSONResponse({"ok": False, "error": msg})
         return JSONResponse({"ok": True, "conversation": conversation})
+    # ── propose ──
     if action == "propose":
         conversation_id = str(payload.get("conversation_id", "")).strip()
         answer_id = str(payload.get("answer_id", "")).strip()
         )
         if conversation is None:
             return JSONResponse({"ok": False, "error": msg})
         return JSONResponse({"ok": True, "conversation": conversation})
+    # ── vote ──
     if action == "vote":
         conversation_id = str(payload.get("conversation_id", "")).strip()
         answer_id = str(payload.get("answer_id", "")).strip()
         )
         if conversation is None:
             return JSONResponse({"ok": False, "error": msg})
         if msg == "already_voted":
             return JSONResponse({"ok": False, "error": "already voted"})
         return JSONResponse({"ok": True, "conversation": conversation})
     return JSONResponse({"ok": False, "error": f"unknown action: {action}"})
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run("main:app", host="0.0.0.0", port=7860, reload=False)