Spaces:

nikeshn
/

kulibrary

Running

App Files Files Community

nikeshn commited on 20 days ago

Commit

52e8e7a

verified ·

1 Parent(s): af226a7

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -19

app.py CHANGED Viewed

@@ -38,7 +38,8 @@ import httpx
 # ===== CONFIG =====
 KNOWLEDGE_DIR = "knowledge"
-FAISS_INDEX_PATH = "faiss_index"
 DB_PATH = "/data/analytics.db" if os.path.exists("/data") else "analytics.db"
 CHUNK_SIZE = 800
 CHUNK_OVERLAP = 100
@@ -113,8 +114,11 @@ def set_config(key, value):
 # ===== ADMIN AUTH =====
-# Set ADMIN_PASSWORD as HF Space Secret
-ADMIN_PASSWORD = os.environ.get("ADMIN_PASSWORD", "kulibadmin2026")
 admin_sessions = {}  # token -> expiry timestamp
 def create_session():
@@ -309,15 +313,19 @@ async def tool_search_pubmed(query, limit=5):
         return {"error": f"PubMed: {str(e)}", "results": [], "source": "PubMed"}
-# ===== TOOL: SEARCH CONSENSUS (direct link — MCP requires OAuth) =====
 async def tool_search_consensus(query, limit=5):
-    return {
-        "total": 0,
-        "results": [],
-        "source": "Consensus",
-        "search_url": f"https://consensus.app/results/?q={query}",
-        "message": "Search Consensus for AI-powered research synthesis.",
-    }
 # ===== TOOL: SEARCH SEMANTIC SCHOLAR (direct API — free, no auth) =====
@@ -371,8 +379,7 @@ async def tool_search_scholar(query, limit=5):
     except Exception as e:
         return {"error": f"Semantic Scholar: {str(e)}", "results": [], "source": "Semantic Scholar"}
-# ===== TOOL: LIBRARY INFO (RAG) =====
-async def tool_library_info(question, history=None):
     if not vectorstore:
         return {"answer": "Knowledge base not initialized.", "sources": []}
@@ -397,7 +404,14 @@ Question: {question}
 Answer:"""
-    llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.2, max_tokens=500)
     response = llm.invoke(prompt)
     sources = []
@@ -515,14 +529,13 @@ async def search(req: SearchRequest):
             result = {"error": f"Unknown source: {source}", "results": []}
         elapsed = time.time() - start
-        log_query(req.query, source, req.model, elapsed, len(result.get("results", [])), result.get("error"))
         result["response_time"] = round(elapsed, 2)
         result["is_medical"] = is_medical_topic(req.query)
         return result
     except Exception as e:
         elapsed = time.time() - start
-        log_query(req.query, source, req.model, elapsed, 0, str(e))
         return {"error": str(e), "results": [], "response_time": round(elapsed, 2)}
@@ -547,9 +560,9 @@ async def rag_query(req: RAGRequest):
     start = time.time()
     try:
         history = [{"role": m.role, "content": m.content} for m in req.history] if req.history else None
-        result = await tool_library_info(req.question, history)
         elapsed = time.time() - start
-        log_query(req.question, "rag", req.model, elapsed, len(result.get("sources", [])))
         return {
             "answer": result["answer"],
             "sources": result["sources"],
@@ -558,7 +571,6 @@ async def rag_query(req: RAGRequest):
         }
     except Exception as e:
         elapsed = time.time() - start
-        log_query(req.question, "rag", req.model, elapsed, 0, str(e))
         return {"answer": "Error processing your question.", "sources": [], "error": str(e)}

 # ===== CONFIG =====
 KNOWLEDGE_DIR = "knowledge"
+# Use /data for persistence across HF Space restarts — fall back to local if /data unavailable
+FAISS_INDEX_PATH = "/data/faiss_index" if os.path.exists("/data") else "faiss_index"
 DB_PATH = "/data/analytics.db" if os.path.exists("/data") else "analytics.db"
 CHUNK_SIZE = 800
 CHUNK_OVERLAP = 100
 # ===== ADMIN AUTH =====
+# ADMIN_PASSWORD must be set as HF Space Secret — no insecure fallback
+ADMIN_PASSWORD = os.environ.get("ADMIN_PASSWORD", "")
+if not ADMIN_PASSWORD:
+    import warnings
+    warnings.warn("ADMIN_PASSWORD secret is not set. Admin dashboard will be inaccessible until configured.")
 admin_sessions = {}  # token -> expiry timestamp
 def create_session():
         return {"error": f"PubMed: {str(e)}", "results": [], "source": "PubMed"}
+# ===== TOOL: SEARCH CONSENSUS (via Semantic Scholar with consensus framing) =====
 async def tool_search_consensus(query, limit=5):
+    """
+    Consensus.app requires OAuth so we can't call it directly.
+    Instead we search Semantic Scholar with the same query and return
+    results alongside a direct Consensus deep-link so the user can
+    also check the AI-synthesized answer there.
+    """
+    scholar = await tool_search_scholar(query, limit)
+    scholar["source"] = "Consensus / Semantic Scholar"
+    scholar["consensus_url"] = f"https://consensus.app/results/?q={query}"
+    scholar["message"] = "Results from Semantic Scholar. For AI-synthesized consensus, click the Consensus link."
+    return scholar
 # ===== TOOL: SEARCH SEMANTIC SCHOLAR (direct API — free, no auth) =====
     except Exception as e:
         return {"error": f"Semantic Scholar: {str(e)}", "results": [], "source": "Semantic Scholar"}
+async def tool_library_info(question, history=None, model="gpt"):
     if not vectorstore:
         return {"answer": "Knowledge base not initialized.", "sources": []}
 Answer:"""
+    # Respect the model selection — use Claude if requested, GPT otherwise
+    use_claude = model == "claude" and os.environ.get("ANTHROPIC_API_KEY")
+    if use_claude:
+        from langchain_anthropic import ChatAnthropic
+        llm = ChatAnthropic(model="claude-haiku-4-5-20251001", temperature=0.2, max_tokens=500)
+    else:
+        llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.2, max_tokens=500)
     response = llm.invoke(prompt)
     sources = []
             result = {"error": f"Unknown source: {source}", "results": []}
         elapsed = time.time() - start
+        # Logging handled by Cloudflare D1 via worker /log endpoint (single source of truth)
         result["response_time"] = round(elapsed, 2)
         result["is_medical"] = is_medical_topic(req.query)
         return result
     except Exception as e:
         elapsed = time.time() - start
         return {"error": str(e), "results": [], "response_time": round(elapsed, 2)}
     start = time.time()
     try:
         history = [{"role": m.role, "content": m.content} for m in req.history] if req.history else None
+        result = await tool_library_info(req.question, history, model=req.model)
         elapsed = time.time() - start
+        # Logging handled by Cloudflare D1 via worker /log endpoint
         return {
             "answer": result["answer"],
             "sources": result["sources"],
         }
     except Exception as e:
         elapsed = time.time() - start
         return {"answer": "Error processing your question.", "sources": [], "error": str(e)}