Spaces:

TheQuantEd
/

CTA

Running

App Files Files Community

TheQuantEd commited on 6 days ago

Commit

bfeb61b

1 Parent(s): 8bd7457

Fix backend crash: make Neo4jGraph lazy-init in graphrag.py — was failing at import time

Browse files

Files changed (1) hide show

backend/graphrag.py +43 -33

backend/graphrag.py CHANGED Viewed

@@ -10,24 +10,17 @@ from dotenv import load_dotenv
 load_dotenv()
-graph = Neo4jGraph(
-    url=os.getenv("NEO4J_URI"),
-    username=os.getenv("NEO4J_USERNAME"),
-    password=os.getenv("NEO4J_PASSWORD"),
-    database=os.getenv("NEO4J_DATABASE", "neo4j"),
-)
 def _strip_thinking(text: str) -> str:
-    """Remove <think>...</think> blocks that reasoning models emit before the actual answer."""
-    # Strip block tags (including variations like <thinking>)
     text = re.sub(r"<think(?:ing)?>.*?</think(?:ing)?>", "", text, flags=re.DOTALL | re.IGNORECASE)
     return text.strip()
 class _ThinkStrippedLLM(ChatOpenAI):
-    """ChatOpenAI wrapper that strips <think> reasoning tokens from every response."""
     def _create_chat_result(self, response, generation_info=None) -> ChatResult:
         result: ChatResult = super()._create_chat_result(response, generation_info)
         cleaned = []
@@ -38,12 +31,26 @@ class _ThinkStrippedLLM(ChatOpenAI):
         return ChatResult(generations=cleaned, llm_output=result.llm_output)
-llm = _ThinkStrippedLLM(
-    model=os.getenv("OPENAI_MODEL", "qwen/qwen3-32b"),
-    openai_api_key=os.getenv("OPENAI_API_KEY"),
-    openai_api_base=os.getenv("OPENAI_BASE_URL"),
-    temperature=0,
-)
 _CYPHER_GENERATION_TEMPLATE = """You are an expert Neo4j Cypher query writer for a clinical trial matching system.
@@ -66,7 +73,7 @@ Relationships:
 - (Trial)-[:LOCATED_AT]->(StudySite)
 Rules:
-- For biomarker lookups, use the `id` property with uppercase underscore format, e.g. `{{id: 'HER2_POS'}}` NOT `{{name: 'HER2', status: 'positive'}}`
 - For condition lookups on Trial nodes, use lowercase: `t.condition = 'breast cancer'`
 - Always use relationship pattern (Patient)-[:ELIGIBLE_FOR]->(Trial) to find eligible patients
 - Limit results to 25 unless asked for more
@@ -79,22 +86,26 @@ _CYPHER_PROMPT = PromptTemplate(
     template=_CYPHER_GENERATION_TEMPLATE,
 )
-graph_chain = GraphCypherQAChain.from_llm(
-    llm=llm,
-    graph=graph,
-    verbose=True,
-    allow_dangerous_requests=True,
-    cypher_prompt=_CYPHER_PROMPT,
-)
 def retrieve_patient_trial_matches(patient_id: str) -> list:
-    query = f"""
-    MATCH (p:Patient {{id: '{patient_id}'}})-[:HAS_DIAGNOSIS]->(d:Diagnosis)-[:ELIGIBLE_FOR]->(t:Trial)
-    RETURN p.id as patient, d.name as diagnosis, t.id as trial, t.phase as phase, t.condition as condition
-    """
     try:
-        return graph.query(query)
     except Exception as e:
         print(f"[graphrag] query error: {e}")
         return []
@@ -102,19 +113,18 @@ def retrieve_patient_trial_matches(patient_id: str) -> list:
 def rag_query(question: str) -> str:
     try:
-        result = graph_chain.run(question)
         return _strip_thinking(result) if result else "No results found."
     except Exception as e:
         err = str(e)
-        # Surface a clean message instead of the raw Neo4j stack trace
         if "<think>" in err or "SyntaxError" in err:
-            return "The query model returned unexpected output. Please rephrase your question (e.g. 'List patients eligible for breast cancer trials')."
         return f"Graph query error: {err}"
 def get_graph_stats() -> dict:
     try:
-        result = graph.query("""
             MATCH (p:Patient) WITH count(p) as patients
             MATCH (t:Trial) WITH patients, count(t) as trials
             MATCH (d:Diagnosis) WITH patients, trials, count(d) as diagnoses

 load_dotenv()
+# Lazily initialised — Neo4j may not be ready at import time
+_graph = None
+_graph_chain = None
 def _strip_thinking(text: str) -> str:
     text = re.sub(r"<think(?:ing)?>.*?</think(?:ing)?>", "", text, flags=re.DOTALL | re.IGNORECASE)
     return text.strip()
 class _ThinkStrippedLLM(ChatOpenAI):
     def _create_chat_result(self, response, generation_info=None) -> ChatResult:
         result: ChatResult = super()._create_chat_result(response, generation_info)
         cleaned = []
         return ChatResult(generations=cleaned, llm_output=result.llm_output)
+def _get_llm():
+    return _ThinkStrippedLLM(
+        model=os.getenv("OPENAI_MODEL", "qwen/qwen3-32b"),
+        openai_api_key=os.getenv("OPENAI_API_KEY"),
+        openai_api_base=os.getenv("OPENAI_BASE_URL"),
+        temperature=0,
+    )
+def _get_graph():
+    global _graph
+    if _graph is None:
+        _graph = Neo4jGraph(
+            url=os.getenv("NEO4J_URI", "bolt://127.0.0.1:7687"),
+            username=os.getenv("NEO4J_USERNAME", "neo4j"),
+            password=os.getenv("NEO4J_PASSWORD", "clinicalmatch2024"),
+            database=os.getenv("NEO4J_DATABASE", "neo4j"),
+        )
+    return _graph
 _CYPHER_GENERATION_TEMPLATE = """You are an expert Neo4j Cypher query writer for a clinical trial matching system.
 - (Trial)-[:LOCATED_AT]->(StudySite)
 Rules:
+- For biomarker lookups, use the `id` property with uppercase underscore format, e.g. `{{id: 'HER2_POS'}}`
 - For condition lookups on Trial nodes, use lowercase: `t.condition = 'breast cancer'`
 - Always use relationship pattern (Patient)-[:ELIGIBLE_FOR]->(Trial) to find eligible patients
 - Limit results to 25 unless asked for more
     template=_CYPHER_GENERATION_TEMPLATE,
 )
+def _get_chain():
+    global _graph_chain
+    if _graph_chain is None:
+        _graph_chain = GraphCypherQAChain.from_llm(
+            llm=_get_llm(),
+            graph=_get_graph(),
+            verbose=True,
+            allow_dangerous_requests=True,
+            cypher_prompt=_CYPHER_PROMPT,
+        )
+    return _graph_chain
 def retrieve_patient_trial_matches(patient_id: str) -> list:
     try:
+        return _get_graph().query(f"""
+            MATCH (p:Patient {{id: '{patient_id}'}})-[:HAS_DIAGNOSIS]->(d:Diagnosis)-[:ELIGIBLE_FOR]->(t:Trial)
+            RETURN p.id as patient, d.name as diagnosis, t.id as trial, t.phase as phase, t.condition as condition
+        """)
     except Exception as e:
         print(f"[graphrag] query error: {e}")
         return []
 def rag_query(question: str) -> str:
     try:
+        result = _get_chain().run(question)
         return _strip_thinking(result) if result else "No results found."
     except Exception as e:
         err = str(e)
         if "<think>" in err or "SyntaxError" in err:
+            return "The query model returned unexpected output. Please rephrase your question."
         return f"Graph query error: {err}"
 def get_graph_stats() -> dict:
     try:
+        result = _get_graph().query("""
             MATCH (p:Patient) WITH count(p) as patients
             MATCH (t:Trial) WITH patients, count(t) as trials
             MATCH (d:Diagnosis) WITH patients, trials, count(d) as diagnoses