Spaces:
Running
Running
| # chat/reasoning_engine.py | |
| """ | |
| Advanced reasoning engine for deep methodological and clinical analysis | |
| Provides evidence-based reasoning and quality assessment | |
| """ | |
| from typing import List, Dict, Any, Tuple | |
| from llm.llm_provider import GrokLLM | |
| from llm.prompt_templates import MedicalResearchPrompts, ResponseFormatter | |
| import re | |
| class ReasoningEngine: | |
| """ | |
| Advanced reasoning engine for deep research analysis | |
| Focuses on methodological rigor, evidence quality, and clinical relevance | |
| """ | |
| def __init__(self, llm=None): | |
| self.llm = llm or GrokLLM(model="model") # Use shared LLM | |
| self.prompts = MedicalResearchPrompts() | |
| self.formatter = ResponseFormatter() | |
| def analyze_methodology(self, papers: List[Dict], query: str, domain: str) -> Dict[str, Any]: | |
| """ | |
| Deep methodological analysis of research papers | |
| """ | |
| print(f"π¬ Conducting methodological analysis on {len(papers)} papers") | |
| if len(papers) > 10: | |
| print(f"π Focusing on top 10 most relevant papers for deep analysis") | |
| papers = papers[:10] | |
| try: | |
| # Generate methodological reasoning | |
| methodology_prompt = self.prompts.methodology_reasoning(papers, query, domain) | |
| response = self.llm.generate( | |
| methodology_prompt, | |
| system_message=self.prompts.SYSTEM_MESSAGES["methodology_expert"], | |
| temperature=0.1, | |
| max_tokens=2000 | |
| ) | |
| # Extract structured methodological insights | |
| method_insights = self._extract_methodological_insights(response) | |
| quality_assessment = self._assess_evidence_quality(papers) | |
| best_practices = self._identify_best_practices(papers) | |
| return { | |
| "methodological_analysis": response, | |
| "structured_insights": method_insights, | |
| "evidence_quality": quality_assessment, | |
| "best_practices": best_practices, | |
| "query": query, | |
| "domain": domain, | |
| "papers_analyzed": len(papers) | |
| } | |
| except Exception as e: | |
| print(f"β Methodological analysis error: {e}") | |
| return self._create_fallback_methodology_analysis(papers, query, domain) | |
| def analyze_clinical_implications(self, papers: List[Dict], domain: str) -> Dict[str, Any]: | |
| """ | |
| Analyze clinical implications and translational potential | |
| """ | |
| print(f"π₯ Analyzing clinical implications of {len(papers)} papers") | |
| try: | |
| # Generate clinical implications analysis | |
| clinical_prompt = self.prompts.clinical_implications(papers, domain) | |
| response = self.llm.generate( | |
| clinical_prompt, | |
| system_message=self.prompts.SYSTEM_MESSAGES["domain_specialist"].format(domain=domain), | |
| temperature=0.1, | |
| max_tokens=2000 | |
| ) | |
| # Extract structured clinical insights | |
| clinical_insights = self._extract_clinical_insights(response) | |
| translation_readiness = self._assess_translation_readiness(papers) | |
| implementation_challenges = self._identify_implementation_challenges(papers) | |
| return { | |
| "clinical_analysis": response, | |
| "clinical_insights": clinical_insights, | |
| "translation_readiness": translation_readiness, | |
| "implementation_challenges": implementation_challenges, | |
| "domain": domain, | |
| "papers_analyzed": len(papers) | |
| } | |
| except Exception as e: | |
| print(f"β Clinical implications analysis error: {e}") | |
| return self._create_fallback_clinical_analysis(papers, domain) | |
| def _extract_methodological_insights(self, analysis: str) -> List[Dict[str, Any]]: | |
| """Extract structured methodological insights""" | |
| insights = [] | |
| # Patterns for different methodological aspects | |
| patterns = { | |
| "experimental_design": r"(?:experimental design|study design)[^.!?]*[.!?]", | |
| "statistical_methods": r"(?:statistical method|analysis approach|statistical test)[^.!?]*[.!?]", | |
| "validation_strategy": r"(?:validation|cross.validation|external validation)[^.!?]*[.!?]", | |
| "reproducibility": r"(?:reproducibility|replication|reproducible)[^.!?]*[.!?]", | |
| "limitations": r"(?:limitation|constraint|challenge)[^.!?]*[.!?]" | |
| } | |
| for aspect, pattern in patterns.items(): | |
| matches = re.findall(pattern, analysis, re.IGNORECASE) | |
| for match in matches[:3]: # Limit to top 3 per aspect | |
| insights.append({ | |
| "aspect": aspect.replace('_', ' ').title(), | |
| "insight": match.strip(), | |
| "confidence": self._assess_insight_confidence(match) | |
| }) | |
| return insights | |
| def _assess_evidence_quality(self, papers: List[Dict]) -> Dict[str, Any]: | |
| """Assess overall evidence quality across papers""" | |
| quality_metrics = { | |
| "sample_sizes": [], | |
| "validation_mentioned": 0, | |
| "limitations_discussed": 0, | |
| "comparison_baselines": 0, | |
| "statistical_significance": 0 | |
| } | |
| for paper in papers: | |
| abstract = paper.get('abstract', '').lower() | |
| # Sample size extraction (simplified) | |
| sample_matches = re.findall(r'(\d+)\s*(?:patient|subject|sample|case)', abstract) | |
| if sample_matches: | |
| quality_metrics["sample_sizes"].extend([int(match) for match in sample_matches]) | |
| # Methodological quality indicators | |
| if any(term in abstract for term in ['validation', 'cross-validation', 'external validation']): | |
| quality_metrics["validation_mentioned"] += 1 | |
| if any(term in abstract for term in ['limitation', 'constraint', 'challenge']): | |
| quality_metrics["limitations_discussed"] += 1 | |
| if any(term in abstract for term in ['compared to', 'baseline', 'versus']): | |
| quality_metrics["comparison_baselines"] += 1 | |
| if any(term in abstract for term in ['p value', 'p<', 'statistical significance', 'confidence interval']): | |
| quality_metrics["statistical_significance"] += 1 | |
| # Calculate overall quality score | |
| total_papers = len(papers) | |
| quality_score = ( | |
| (quality_metrics["validation_mentioned"] / total_papers * 0.3) + | |
| (quality_metrics["limitations_discussed"] / total_papers * 0.2) + | |
| (quality_metrics["comparison_baselines"] / total_papers * 0.25) + | |
| (quality_metrics["statistical_significance"] / total_papers * 0.25) | |
| ) * 100 | |
| # Sample size analysis | |
| sample_sizes = quality_metrics["sample_sizes"] | |
| avg_sample_size = sum(sample_sizes) / len(sample_sizes) if sample_sizes else 0 | |
| return { | |
| "overall_quality_score": round(quality_score, 1), | |
| "quality_level": "high" if quality_score > 70 else "medium" if quality_score > 40 else "low", | |
| "sample_size_analysis": { | |
| "average_sample_size": round(avg_sample_size), | |
| "papers_with_sample_info": len(sample_sizes), | |
| "sample_size_range": f"{min(sample_sizes)}-{max(sample_sizes)}" if sample_sizes else "N/A" | |
| }, | |
| "methodological_indicators": quality_metrics | |
| } | |
| def _identify_best_practices(self, papers: List[Dict]) -> List[Dict[str, Any]]: | |
| """Identify emerging best practices from the literature""" | |
| best_practices = [] | |
| practice_categories = { | |
| "validation": ["cross-validation", "external validation", "independent test set"], | |
| "transparency": ["open source", "code available", "model sharing"], | |
| "reproducibility": ["reproducible", "replication", "detailed methods"], | |
| "ethical_considerations": ["ethical", "bias assessment", "fairness"], | |
| "clinical_relevance": ["clinical utility", "patient outcomes", "clinical impact"] | |
| } | |
| for paper in papers: | |
| abstract = paper.get('abstract', '').lower() | |
| title = paper.get('title', '').lower() | |
| for category, keywords in practice_categories.items(): | |
| if any(keyword in abstract or keyword in title for keyword in keywords): | |
| # Check if this practice is already recorded | |
| existing_practice = next((p for p in best_practices if p['practice'] == category), None) | |
| if existing_practice: | |
| existing_practice['supporting_papers'] += 1 | |
| else: | |
| best_practices.append({ | |
| "practice": category.replace('_', ' ').title(), | |
| "description": self._get_practice_description(category), | |
| "supporting_papers": 1, | |
| "examples": [paper.get('title', 'Unknown')] | |
| }) | |
| # Sort by number of supporting papers | |
| best_practices.sort(key=lambda x: x['supporting_papers'], reverse=True) | |
| return best_practices[:10] # Return top 10 best practices | |
| def _get_practice_description(self, practice_category: str) -> str: | |
| """Get description for best practice categories""" | |
| descriptions = { | |
| "validation": "Rigorous validation methods including cross-validation and external testing", | |
| "transparency": "Open sharing of code, data, and methodologies", | |
| "reproducibility": "Detailed methods enabling study replication", | |
| "ethical_considerations": "Consideration of ethical implications and bias assessment", | |
| "clinical_relevance": "Focus on clinically meaningful outcomes and applications" | |
| } | |
| return descriptions.get(practice_category, "Emerging best practice in the field") | |
| def _extract_clinical_insights(self, analysis: str) -> List[Dict[str, Any]]: | |
| """Extract structured clinical insights""" | |
| insights = [] | |
| # Patterns for clinical insights | |
| patterns = { | |
| "immediate_applications": r"(?:immediate application|ready for use|clinical implementation)[^.!?]*[.!?]", | |
| "future_directions": r"(?:future direction|further research|next steps)[^.!?]*[.!?]", | |
| "patient_impact": r"(?:patient impact|clinical benefit|patient outcomes)[^.!?]*[.!?]", | |
| "implementation_challenges": r"(?:implementation challenge|adoption barrier|clinical integration)[^.!?]*[.!?]" | |
| } | |
| for aspect, pattern in patterns.items(): | |
| matches = re.findall(pattern, analysis, re.IGNORECASE) | |
| for match in matches[:2]: # Limit to top 2 per aspect | |
| insights.append({ | |
| "aspect": aspect.replace('_', ' ').title(), | |
| "insight": match.strip(), | |
| "readiness": self._assess_clinical_readiness(match) | |
| }) | |
| return insights | |
| def _assess_translation_readiness(self, papers: List[Dict]) -> Dict[str, Any]: | |
| """Assess translational readiness of research findings""" | |
| readiness_indicators = { | |
| "clinical_trial_mentions": 0, | |
| "patient_outcomes": 0, | |
| "real_world_data": 0, | |
| "implementation_studies": 0, | |
| "guideline_references": 0 | |
| } | |
| for paper in papers: | |
| abstract = paper.get('abstract', '').lower() | |
| title = paper.get('title', '').lower() | |
| if any(term in abstract or term in title for term in ['clinical trial', 'randomized', 'controlled study']): | |
| readiness_indicators["clinical_trial_mentions"] += 1 | |
| if any(term in abstract for term in ['patient outcome', 'survival', 'mortality', 'quality of life']): | |
| readiness_indicators["patient_outcomes"] += 1 | |
| if any(term in abstract for term in ['real world', 'clinical practice', 'routine care']): | |
| readiness_indicators["real_world_data"] += 1 | |
| if any(term in abstract for term in ['implementation', 'adoption', 'integration']): | |
| readiness_indicators["implementation_studies"] += 1 | |
| if any(term in abstract for term in ['guideline', 'recommendation', 'standard of care']): | |
| readiness_indicators["guideline_references"] += 1 | |
| total_papers = len(papers) | |
| readiness_score = sum(readiness_indicators.values()) / (total_papers * 5) * 100 | |
| return { | |
| "readiness_score": round(readiness_score, 1), | |
| "readiness_level": "high" if readiness_score > 60 else "medium" if readiness_score > 30 else "low", | |
| "indicators": readiness_indicators | |
| } | |
| def _identify_implementation_challenges(self, papers: List[Dict]) -> List[str]: | |
| """Identify common implementation challenges""" | |
| challenges = [] | |
| challenge_keywords = [ | |
| 'cost', 'expensive', 'infrastructure', 'expertise', 'training', | |
| 'regulatory', 'fda', 'approval', 'validation', 'integration', | |
| 'workflow', 'adoption', 'resistance', 'change management' | |
| ] | |
| for paper in papers: | |
| abstract = paper.get('abstract', '').lower() | |
| for keyword in challenge_keywords: | |
| if keyword in abstract and keyword not in challenges: | |
| challenges.append(keyword) | |
| return challenges[:10] # Return top 10 challenges | |
| def _assess_insight_confidence(self, insight: str) -> str: | |
| """Assess confidence level of an insight""" | |
| insight_lower = insight.lower() | |
| if any(term in insight_lower for term in ['clearly', 'definitely', 'strongly', 'convincingly']): | |
| return "high" | |
| elif any(term in insight_lower for term in ['suggest', 'indicate', 'likely', 'probably']): | |
| return "medium" | |
| else: | |
| return "low" | |
| def _assess_clinical_readiness(self, insight: str) -> str: | |
| """Assess clinical readiness level""" | |
| insight_lower = insight.lower() | |
| if any(term in insight_lower for term in ['immediate', 'ready', 'current', 'now']): | |
| return "immediate" | |
| elif any(term in insight_lower for term in ['near future', 'soon', 'emerging']): | |
| return "near_term" | |
| else: | |
| return "long_term" | |
| def _create_fallback_methodology_analysis(self, papers: List[Dict], query: str, domain: str) -> Dict[str, Any]: | |
| """Create basic methodology analysis when LLM fails""" | |
| quality_assessment = self._assess_evidence_quality(papers) | |
| best_practices = self._identify_best_practices(papers) | |
| basic_analysis = f""" | |
| Basic Methodological Analysis for: {query} | |
| Domain: {domain} | |
| Papers Analyzed: {len(papers)} | |
| Evidence Quality: {quality_assessment['quality_level']} ({quality_assessment['overall_quality_score']}/100) | |
| Best Practices Identified: {len(best_practices)} | |
| Note: Detailed methodological reasoning unavailable. | |
| """ | |
| return { | |
| "methodological_analysis": basic_analysis, | |
| "structured_insights": [], | |
| "evidence_quality": quality_assessment, | |
| "best_practices": best_practices, | |
| "query": query, | |
| "domain": domain, | |
| "papers_analyzed": len(papers), | |
| "fallback_used": True | |
| } | |
| def _create_fallback_clinical_analysis(self, papers: List[Dict], domain: str) -> Dict[str, Any]: | |
| """Create basic clinical analysis when LLM fails""" | |
| translation_readiness = self._assess_translation_readiness(papers) | |
| implementation_challenges = self._identify_implementation_challenges(papers) | |
| basic_analysis = f""" | |
| Basic Clinical Implications Analysis for: {domain} | |
| Papers Analyzed: {len(papers)} | |
| Translation Readiness: {translation_readiness['readiness_level']} ({translation_readiness['readiness_score']}/100) | |
| Implementation Challenges: {len(implementation_challenges)} | |
| Note: Detailed clinical analysis unavailable. | |
| """ | |
| return { | |
| "clinical_analysis": basic_analysis, | |
| "clinical_insights": [], | |
| "translation_readiness": translation_readiness, | |
| "implementation_challenges": implementation_challenges, | |
| "domain": domain, | |
| "papers_analyzed": len(papers), | |
| "fallback_used": True | |
| } | |
| # Quick test | |
| def test_reasoning_engine(): | |
| """Test the reasoning engine""" | |
| print("π§ͺ Testing Reasoning Engine") | |
| print("=" * 50) | |
| test_papers = [ | |
| { | |
| 'title': 'Randomized Trial of AI Diagnostic Tool', | |
| 'authors': ['Smith J', 'Johnson A'], | |
| 'abstract': 'Randomized controlled trial of 1000 patients comparing AI diagnostic tool with radiologist interpretation. The AI system showed non-inferiority with 94% accuracy vs 92% for radiologists (p<0.05). Limitations include single-center design.', | |
| 'source': 'NEJM', | |
| 'domain': 'medical_imaging', | |
| 'publication_date': '2024-01-15' | |
| }, | |
| { | |
| 'title': 'Deep Learning for Early Cancer Detection', | |
| 'authors': ['Lee K', 'Chen R'], | |
| 'abstract': 'Prospective study applying deep learning to screening mammography in 50,000 patients. The model achieved AUC of 0.95 for early cancer detection. External validation performed on independent dataset.', | |
| 'source': 'JAMA', | |
| 'domain': 'medical_imaging', | |
| 'publication_date': '2024-02-20' | |
| } | |
| ] | |
| engine = ReasoningEngine() | |
| try: | |
| # Test methodological analysis | |
| method_analysis = engine.analyze_methodology( | |
| test_papers, | |
| "AI diagnostic accuracy in medical imaging", | |
| "medical_imaging" | |
| ) | |
| print(f"β Methodological analysis completed") | |
| print(f"π Evidence quality: {method_analysis['evidence_quality']['quality_level']}") | |
| print(f"π‘ Best practices identified: {len(method_analysis['best_practices'])}") | |
| # Test clinical implications | |
| clinical_analysis = engine.analyze_clinical_implications(test_papers, "medical_imaging") | |
| print(f"β Clinical analysis completed") | |
| print(f"π₯ Translation readiness: {clinical_analysis['translation_readiness']['readiness_level']}") | |
| print(f"π§ Implementation challenges: {len(clinical_analysis['implementation_challenges'])}") | |
| except Exception as e: | |
| print(f"β Reasoning engine test failed: {e}") | |
| if __name__ == "__main__": | |
| test_reasoning_engine() |