File size: 19,435 Bytes
1367957
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
# chat/reasoning_engine.py
"""

Advanced reasoning engine for deep methodological and clinical analysis

Provides evidence-based reasoning and quality assessment

"""

from typing import List, Dict, Any, Tuple
from llm.llm_provider import GrokLLM
from llm.prompt_templates import MedicalResearchPrompts, ResponseFormatter
import re


class ReasoningEngine:
    """

    Advanced reasoning engine for deep research analysis

    Focuses on methodological rigor, evidence quality, and clinical relevance

    """

    def __init__(self, llm=None):
        self.llm = llm or GrokLLM(model="model")  # Use shared LLM
        self.prompts = MedicalResearchPrompts()
        self.formatter = ResponseFormatter()

    def analyze_methodology(self, papers: List[Dict], query: str, domain: str) -> Dict[str, Any]:
        """

        Deep methodological analysis of research papers

        """
        print(f"πŸ”¬ Conducting methodological analysis on {len(papers)} papers")

        if len(papers) > 10:
            print(f"πŸ”„ Focusing on top 10 most relevant papers for deep analysis")
            papers = papers[:10]

        try:
            # Generate methodological reasoning
            methodology_prompt = self.prompts.methodology_reasoning(papers, query, domain)

            response = self.llm.generate(
                methodology_prompt,
                system_message=self.prompts.SYSTEM_MESSAGES["methodology_expert"],
                temperature=0.1,
                max_tokens=2000
            )

            # Extract structured methodological insights
            method_insights = self._extract_methodological_insights(response)
            quality_assessment = self._assess_evidence_quality(papers)
            best_practices = self._identify_best_practices(papers)

            return {
                "methodological_analysis": response,
                "structured_insights": method_insights,
                "evidence_quality": quality_assessment,
                "best_practices": best_practices,
                "query": query,
                "domain": domain,
                "papers_analyzed": len(papers)
            }

        except Exception as e:
            print(f"❌ Methodological analysis error: {e}")
            return self._create_fallback_methodology_analysis(papers, query, domain)

    def analyze_clinical_implications(self, papers: List[Dict], domain: str) -> Dict[str, Any]:
        """

        Analyze clinical implications and translational potential

        """
        print(f"πŸ₯ Analyzing clinical implications of {len(papers)} papers")

        try:
            # Generate clinical implications analysis
            clinical_prompt = self.prompts.clinical_implications(papers, domain)

            response = self.llm.generate(
                clinical_prompt,
                system_message=self.prompts.SYSTEM_MESSAGES["domain_specialist"].format(domain=domain),
                temperature=0.1,
                max_tokens=2000
            )

            # Extract structured clinical insights
            clinical_insights = self._extract_clinical_insights(response)
            translation_readiness = self._assess_translation_readiness(papers)
            implementation_challenges = self._identify_implementation_challenges(papers)

            return {
                "clinical_analysis": response,
                "clinical_insights": clinical_insights,
                "translation_readiness": translation_readiness,
                "implementation_challenges": implementation_challenges,
                "domain": domain,
                "papers_analyzed": len(papers)
            }

        except Exception as e:
            print(f"❌ Clinical implications analysis error: {e}")
            return self._create_fallback_clinical_analysis(papers, domain)

    def _extract_methodological_insights(self, analysis: str) -> List[Dict[str, Any]]:
        """Extract structured methodological insights"""
        insights = []

        # Patterns for different methodological aspects
        patterns = {
            "experimental_design": r"(?:experimental design|study design)[^.!?]*[.!?]",
            "statistical_methods": r"(?:statistical method|analysis approach|statistical test)[^.!?]*[.!?]",
            "validation_strategy": r"(?:validation|cross.validation|external validation)[^.!?]*[.!?]",
            "reproducibility": r"(?:reproducibility|replication|reproducible)[^.!?]*[.!?]",
            "limitations": r"(?:limitation|constraint|challenge)[^.!?]*[.!?]"
        }

        for aspect, pattern in patterns.items():
            matches = re.findall(pattern, analysis, re.IGNORECASE)
            for match in matches[:3]:  # Limit to top 3 per aspect
                insights.append({
                    "aspect": aspect.replace('_', ' ').title(),
                    "insight": match.strip(),
                    "confidence": self._assess_insight_confidence(match)
                })

        return insights

    def _assess_evidence_quality(self, papers: List[Dict]) -> Dict[str, Any]:
        """Assess overall evidence quality across papers"""
        quality_metrics = {
            "sample_sizes": [],
            "validation_mentioned": 0,
            "limitations_discussed": 0,
            "comparison_baselines": 0,
            "statistical_significance": 0
        }

        for paper in papers:
            abstract = paper.get('abstract', '').lower()

            # Sample size extraction (simplified)
            sample_matches = re.findall(r'(\d+)\s*(?:patient|subject|sample|case)', abstract)
            if sample_matches:
                quality_metrics["sample_sizes"].extend([int(match) for match in sample_matches])

            # Methodological quality indicators
            if any(term in abstract for term in ['validation', 'cross-validation', 'external validation']):
                quality_metrics["validation_mentioned"] += 1

            if any(term in abstract for term in ['limitation', 'constraint', 'challenge']):
                quality_metrics["limitations_discussed"] += 1

            if any(term in abstract for term in ['compared to', 'baseline', 'versus']):
                quality_metrics["comparison_baselines"] += 1

            if any(term in abstract for term in ['p value', 'p<', 'statistical significance', 'confidence interval']):
                quality_metrics["statistical_significance"] += 1

        # Calculate overall quality score
        total_papers = len(papers)
        quality_score = (
                                (quality_metrics["validation_mentioned"] / total_papers * 0.3) +
                                (quality_metrics["limitations_discussed"] / total_papers * 0.2) +
                                (quality_metrics["comparison_baselines"] / total_papers * 0.25) +
                                (quality_metrics["statistical_significance"] / total_papers * 0.25)
                        ) * 100

        # Sample size analysis
        sample_sizes = quality_metrics["sample_sizes"]
        avg_sample_size = sum(sample_sizes) / len(sample_sizes) if sample_sizes else 0

        return {
            "overall_quality_score": round(quality_score, 1),
            "quality_level": "high" if quality_score > 70 else "medium" if quality_score > 40 else "low",
            "sample_size_analysis": {
                "average_sample_size": round(avg_sample_size),
                "papers_with_sample_info": len(sample_sizes),
                "sample_size_range": f"{min(sample_sizes)}-{max(sample_sizes)}" if sample_sizes else "N/A"
            },
            "methodological_indicators": quality_metrics
        }

    def _identify_best_practices(self, papers: List[Dict]) -> List[Dict[str, Any]]:
        """Identify emerging best practices from the literature"""
        best_practices = []

        practice_categories = {
            "validation": ["cross-validation", "external validation", "independent test set"],
            "transparency": ["open source", "code available", "model sharing"],
            "reproducibility": ["reproducible", "replication", "detailed methods"],
            "ethical_considerations": ["ethical", "bias assessment", "fairness"],
            "clinical_relevance": ["clinical utility", "patient outcomes", "clinical impact"]
        }

        for paper in papers:
            abstract = paper.get('abstract', '').lower()
            title = paper.get('title', '').lower()

            for category, keywords in practice_categories.items():
                if any(keyword in abstract or keyword in title for keyword in keywords):
                    # Check if this practice is already recorded
                    existing_practice = next((p for p in best_practices if p['practice'] == category), None)

                    if existing_practice:
                        existing_practice['supporting_papers'] += 1
                    else:
                        best_practices.append({
                            "practice": category.replace('_', ' ').title(),
                            "description": self._get_practice_description(category),
                            "supporting_papers": 1,
                            "examples": [paper.get('title', 'Unknown')]
                        })

        # Sort by number of supporting papers
        best_practices.sort(key=lambda x: x['supporting_papers'], reverse=True)

        return best_practices[:10]  # Return top 10 best practices

    def _get_practice_description(self, practice_category: str) -> str:
        """Get description for best practice categories"""
        descriptions = {
            "validation": "Rigorous validation methods including cross-validation and external testing",
            "transparency": "Open sharing of code, data, and methodologies",
            "reproducibility": "Detailed methods enabling study replication",
            "ethical_considerations": "Consideration of ethical implications and bias assessment",
            "clinical_relevance": "Focus on clinically meaningful outcomes and applications"
        }
        return descriptions.get(practice_category, "Emerging best practice in the field")

    def _extract_clinical_insights(self, analysis: str) -> List[Dict[str, Any]]:
        """Extract structured clinical insights"""
        insights = []

        # Patterns for clinical insights
        patterns = {
            "immediate_applications": r"(?:immediate application|ready for use|clinical implementation)[^.!?]*[.!?]",
            "future_directions": r"(?:future direction|further research|next steps)[^.!?]*[.!?]",
            "patient_impact": r"(?:patient impact|clinical benefit|patient outcomes)[^.!?]*[.!?]",
            "implementation_challenges": r"(?:implementation challenge|adoption barrier|clinical integration)[^.!?]*[.!?]"
        }

        for aspect, pattern in patterns.items():
            matches = re.findall(pattern, analysis, re.IGNORECASE)
            for match in matches[:2]:  # Limit to top 2 per aspect
                insights.append({
                    "aspect": aspect.replace('_', ' ').title(),
                    "insight": match.strip(),
                    "readiness": self._assess_clinical_readiness(match)
                })

        return insights

    def _assess_translation_readiness(self, papers: List[Dict]) -> Dict[str, Any]:
        """Assess translational readiness of research findings"""
        readiness_indicators = {
            "clinical_trial_mentions": 0,
            "patient_outcomes": 0,
            "real_world_data": 0,
            "implementation_studies": 0,
            "guideline_references": 0
        }

        for paper in papers:
            abstract = paper.get('abstract', '').lower()
            title = paper.get('title', '').lower()

            if any(term in abstract or term in title for term in ['clinical trial', 'randomized', 'controlled study']):
                readiness_indicators["clinical_trial_mentions"] += 1

            if any(term in abstract for term in ['patient outcome', 'survival', 'mortality', 'quality of life']):
                readiness_indicators["patient_outcomes"] += 1

            if any(term in abstract for term in ['real world', 'clinical practice', 'routine care']):
                readiness_indicators["real_world_data"] += 1

            if any(term in abstract for term in ['implementation', 'adoption', 'integration']):
                readiness_indicators["implementation_studies"] += 1

            if any(term in abstract for term in ['guideline', 'recommendation', 'standard of care']):
                readiness_indicators["guideline_references"] += 1

        total_papers = len(papers)
        readiness_score = sum(readiness_indicators.values()) / (total_papers * 5) * 100

        return {
            "readiness_score": round(readiness_score, 1),
            "readiness_level": "high" if readiness_score > 60 else "medium" if readiness_score > 30 else "low",
            "indicators": readiness_indicators
        }

    def _identify_implementation_challenges(self, papers: List[Dict]) -> List[str]:
        """Identify common implementation challenges"""
        challenges = []
        challenge_keywords = [
            'cost', 'expensive', 'infrastructure', 'expertise', 'training',
            'regulatory', 'fda', 'approval', 'validation', 'integration',
            'workflow', 'adoption', 'resistance', 'change management'
        ]

        for paper in papers:
            abstract = paper.get('abstract', '').lower()

            for keyword in challenge_keywords:
                if keyword in abstract and keyword not in challenges:
                    challenges.append(keyword)

        return challenges[:10]  # Return top 10 challenges

    def _assess_insight_confidence(self, insight: str) -> str:
        """Assess confidence level of an insight"""
        insight_lower = insight.lower()

        if any(term in insight_lower for term in ['clearly', 'definitely', 'strongly', 'convincingly']):
            return "high"
        elif any(term in insight_lower for term in ['suggest', 'indicate', 'likely', 'probably']):
            return "medium"
        else:
            return "low"

    def _assess_clinical_readiness(self, insight: str) -> str:
        """Assess clinical readiness level"""
        insight_lower = insight.lower()

        if any(term in insight_lower for term in ['immediate', 'ready', 'current', 'now']):
            return "immediate"
        elif any(term in insight_lower for term in ['near future', 'soon', 'emerging']):
            return "near_term"
        else:
            return "long_term"

    def _create_fallback_methodology_analysis(self, papers: List[Dict], query: str, domain: str) -> Dict[str, Any]:
        """Create basic methodology analysis when LLM fails"""
        quality_assessment = self._assess_evidence_quality(papers)
        best_practices = self._identify_best_practices(papers)

        basic_analysis = f"""

        Basic Methodological Analysis for: {query}

        Domain: {domain}

        Papers Analyzed: {len(papers)}



        Evidence Quality: {quality_assessment['quality_level']} ({quality_assessment['overall_quality_score']}/100)

        Best Practices Identified: {len(best_practices)}



        Note: Detailed methodological reasoning unavailable.

        """

        return {
            "methodological_analysis": basic_analysis,
            "structured_insights": [],
            "evidence_quality": quality_assessment,
            "best_practices": best_practices,
            "query": query,
            "domain": domain,
            "papers_analyzed": len(papers),
            "fallback_used": True
        }

    def _create_fallback_clinical_analysis(self, papers: List[Dict], domain: str) -> Dict[str, Any]:
        """Create basic clinical analysis when LLM fails"""
        translation_readiness = self._assess_translation_readiness(papers)
        implementation_challenges = self._identify_implementation_challenges(papers)

        basic_analysis = f"""

        Basic Clinical Implications Analysis for: {domain}

        Papers Analyzed: {len(papers)}



        Translation Readiness: {translation_readiness['readiness_level']} ({translation_readiness['readiness_score']}/100)

        Implementation Challenges: {len(implementation_challenges)}



        Note: Detailed clinical analysis unavailable.

        """

        return {
            "clinical_analysis": basic_analysis,
            "clinical_insights": [],
            "translation_readiness": translation_readiness,
            "implementation_challenges": implementation_challenges,
            "domain": domain,
            "papers_analyzed": len(papers),
            "fallback_used": True
        }


# Quick test
def test_reasoning_engine():
    """Test the reasoning engine"""
    print("πŸ§ͺ Testing Reasoning Engine")
    print("=" * 50)

    test_papers = [
        {
            'title': 'Randomized Trial of AI Diagnostic Tool',
            'authors': ['Smith J', 'Johnson A'],
            'abstract': 'Randomized controlled trial of 1000 patients comparing AI diagnostic tool with radiologist interpretation. The AI system showed non-inferiority with 94% accuracy vs 92% for radiologists (p<0.05). Limitations include single-center design.',
            'source': 'NEJM',
            'domain': 'medical_imaging',
            'publication_date': '2024-01-15'
        },
        {
            'title': 'Deep Learning for Early Cancer Detection',
            'authors': ['Lee K', 'Chen R'],
            'abstract': 'Prospective study applying deep learning to screening mammography in 50,000 patients. The model achieved AUC of 0.95 for early cancer detection. External validation performed on independent dataset.',
            'source': 'JAMA',
            'domain': 'medical_imaging',
            'publication_date': '2024-02-20'
        }
    ]

    engine = ReasoningEngine()

    try:
        # Test methodological analysis
        method_analysis = engine.analyze_methodology(
            test_papers,
            "AI diagnostic accuracy in medical imaging",
            "medical_imaging"
        )

        print(f"βœ… Methodological analysis completed")
        print(f"πŸ“Š Evidence quality: {method_analysis['evidence_quality']['quality_level']}")
        print(f"πŸ’‘ Best practices identified: {len(method_analysis['best_practices'])}")

        # Test clinical implications
        clinical_analysis = engine.analyze_clinical_implications(test_papers, "medical_imaging")

        print(f"βœ… Clinical analysis completed")
        print(f"πŸ₯ Translation readiness: {clinical_analysis['translation_readiness']['readiness_level']}")
        print(f"🚧 Implementation challenges: {len(clinical_analysis['implementation_challenges'])}")

    except Exception as e:
        print(f"❌ Reasoning engine test failed: {e}")


if __name__ == "__main__":
    test_reasoning_engine()