Spaces:

DataQuests
/

DeepCritical

Running

DeepCritical / src /services /statistical_analyzer.py

Joseph Pollack

Initial commit - Independent repository - Breaking fork relationship

016b413 15 days ago

8.88 kB

	"""Statistical analysis service using Modal code execution.

	This module provides Modal-based statistical analysis WITHOUT depending on
	agent_framework. This allows it to be used in the simple orchestrator mode
	without requiring the magentic optional dependency.

	The AnalysisAgent (in src/agents/) wraps this service for magentic mode.
	"""

	import asyncio
	import re
	from functools import lru_cache, partial
	from typing import Any, Literal

	# Type alias for verdict values
	VerdictType = Literal["SUPPORTED", "REFUTED", "INCONCLUSIVE"]

	from pydantic import BaseModel, Field
	from pydantic_ai import Agent

	from src.agent_factory.judges import get_model
	from src.tools.code_execution import (
	CodeExecutionError,
	get_code_executor,
	get_sandbox_library_prompt,
	)
	from src.utils.models import Evidence


	class AnalysisResult(BaseModel):
	"""Result of statistical analysis."""

	verdict: VerdictType = Field(
	description="SUPPORTED, REFUTED, or INCONCLUSIVE",
	)
	confidence: float = Field(ge=0.0, le=1.0, description="Confidence in verdict (0-1)")
	statistical_evidence: str = Field(
	description="Summary of statistical findings from code execution"
	)
	code_generated: str = Field(description="Python code that was executed")
	execution_output: str = Field(description="Output from code execution")
	key_findings: list[str] = Field(default_factory=list, description="Key takeaways")
	limitations: list[str] = Field(default_factory=list, description="Limitations")


	class StatisticalAnalyzer:
	"""Performs statistical analysis using Modal code execution.

	This service:
	1. Generates Python code for statistical analysis using LLM
	2. Executes code in Modal sandbox
	3. Interprets results
	4. Returns verdict (SUPPORTED/REFUTED/INCONCLUSIVE)

	Note: This class has NO agent_framework dependency, making it safe
	to use in the simple orchestrator without the magentic extra.
	"""

	def __init__(self) -> None:
	"""Initialize the analyzer."""
	self._code_executor: Any = None
	self._agent: Agent[None, str] \| None = None

	def _get_code_executor(self) -> Any:
	"""Lazy initialization of code executor."""
	if self._code_executor is None:
	self._code_executor = get_code_executor()
	return self._code_executor

	def _get_agent(self) -> Agent[None, str]:
	"""Lazy initialization of LLM agent for code generation."""
	if self._agent is None:
	library_versions = get_sandbox_library_prompt()
	self._agent = Agent(
	model=get_model(),
	output_type=str,
	system_prompt=f"""You are a biomedical data scientist.

	Generate Python code to analyze research evidence and test hypotheses.

	Guidelines:
	1. Use pandas, numpy, scipy.stats for analysis
	2. Print clear, interpretable results
	3. Include statistical tests (t-tests, chi-square, etc.)
	4. Calculate effect sizes and confidence intervals
	5. Keep code concise (<50 lines)
	6. Set 'result' variable to SUPPORTED, REFUTED, or INCONCLUSIVE

	Available libraries:
	{library_versions}

	Output format: Return ONLY executable Python code, no explanations.""",
	)
	return self._agent

	async def analyze(
	self,
	query: str,
	evidence: list[Evidence],
	hypothesis: dict[str, Any] \| None = None,
	) -> AnalysisResult:
	"""Run statistical analysis on evidence.

	Args:
	query: The research question
	evidence: List of Evidence objects to analyze
	hypothesis: Optional hypothesis dict with drug, target, pathway, effect

	Returns:
	AnalysisResult with verdict and statistics
	"""
	# Build analysis prompt (method handles slicing internally)
	evidence_summary = self._summarize_evidence(evidence)
	hypothesis_text = ""
	if hypothesis:
	hypothesis_text = (
	f"\nHypothesis: {hypothesis.get('drug', 'Unknown')} → "
	f"{hypothesis.get('target', '?')} → "
	f"{hypothesis.get('pathway', '?')} → "
	f"{hypothesis.get('effect', '?')}\n"
	f"Confidence: {hypothesis.get('confidence', 0.5):.0%}\n"
	)

	prompt = f"""Generate Python code to statistically analyze:

	Research Question: {query}
	{hypothesis_text}

	Evidence Summary:
	{evidence_summary}

	Generate executable Python code to analyze this evidence."""

	try:
	# Generate code
	agent = self._get_agent()
	code_result = await agent.run(prompt)
	generated_code = code_result.output

	# Execute in Modal sandbox
	loop = asyncio.get_running_loop()
	executor = self._get_code_executor()
	execution = await loop.run_in_executor(
	None, partial(executor.execute, generated_code, timeout=120)
	)

	if not execution["success"]:
	return AnalysisResult(
	verdict="INCONCLUSIVE",
	confidence=0.0,
	statistical_evidence=(
	f"Execution failed: {execution.get('error', 'Unknown error')}"
	),
	code_generated=generated_code,
	execution_output=execution.get("stderr", ""),
	key_findings=[],
	limitations=["Code execution failed"],
	)

	# Interpret results
	return self._interpret_results(generated_code, execution)

	except CodeExecutionError as e:
	return AnalysisResult(
	verdict="INCONCLUSIVE",
	confidence=0.0,
	statistical_evidence=str(e),
	code_generated="",
	execution_output="",
	key_findings=[],
	limitations=[f"Analysis error: {e}"],
	)

	def _summarize_evidence(self, evidence: list[Evidence]) -> str:
	"""Summarize evidence for code generation prompt."""
	if not evidence:
	return "No evidence available."

	lines = []
	for i, ev in enumerate(evidence[:5], 1):
	content = ev.content
	truncated = content[:200] + ("..." if len(content) > 200 else "")
	lines.append(f"{i}. {truncated}")
	lines.append(f" Source: {ev.citation.title}")
	lines.append(f" Relevance: {ev.relevance:.0%}\n")

	return "\n".join(lines)

	def _interpret_results(
	self,
	code: str,
	execution: dict[str, Any],
	) -> AnalysisResult:
	"""Interpret code execution results."""
	stdout = execution["stdout"]
	stdout_upper = stdout.upper()

	# Extract verdict with robust word-boundary matching
	verdict: VerdictType = "INCONCLUSIVE"
	if re.search(r"\bSUPPORTED\b", stdout_upper) and not re.search(
	r"\b(?:NOT\|UN)SUPPORTED\b", stdout_upper
	):
	verdict = "SUPPORTED"
	elif re.search(r"\bREFUTED\b", stdout_upper):
	verdict = "REFUTED"

	# Extract key findings
	key_findings = []
	for line in stdout.split("\n"):
	line_lower = line.lower()
	if any(kw in line_lower for kw in ["p-value", "significant", "effect", "mean"]):
	key_findings.append(line.strip())

	# Calculate confidence from p-values
	confidence = self._calculate_confidence(stdout)

	return AnalysisResult(
	verdict=verdict,
	confidence=confidence,
	statistical_evidence=stdout.strip(),
	code_generated=code,
	execution_output=stdout,
	key_findings=key_findings[:5],
	limitations=[
	"Analysis based on summary data only",
	"Limited to available evidence",
	"Statistical tests assume data independence",
	],
	)

	def _calculate_confidence(self, output: str) -> float:
	"""Calculate confidence based on statistical results."""
	p_values = re.findall(r"p[-\s]?value[:\s]+(\d+\.?\d*)", output.lower())

	if p_values:
	try:
	min_p = min(float(p) for p in p_values)
	if min_p < 0.001:
	return 0.95
	elif min_p < 0.01:
	return 0.90
	elif min_p < 0.05:
	return 0.80
	else:
	return 0.60
	except ValueError:
	pass

	return 0.70 # Default


	@lru_cache(maxsize=1)
	def get_statistical_analyzer() -> StatisticalAnalyzer:
	"""Get or create singleton StatisticalAnalyzer instance (thread-safe via lru_cache)."""
	return StatisticalAnalyzer()