File size: 11,007 Bytes
6b731f7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7502356
c073e6b
7502356
 
 
 
6b731f7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7502356
 
 
6b731f7
 
7502356
6b731f7
 
 
 
 
 
 
 
 
c073e6b
6b731f7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7502356
c073e6b
7502356
 
 
6b731f7
 
 
7502356
 
 
6b731f7
 
 
7502356
6b731f7
 
 
 
 
 
 
 
 
 
 
 
c073e6b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6b731f7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7502356
6b731f7
7502356
 
6b731f7
 
 
 
 
 
 
 
 
 
 
 
7502356
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
"""
Synthesizer Module - The SYNTHESIZE step of the Reasoning Loop.

Takes all accumulated findings and produces a coherent, narrative answer.

Unlike the old approach (where the LLM's last response WAS the summary),
the Synthesizer deliberately constructs the answer from evidence:
- Connects findings into a coherent story
- Cites evidence for each claim
- Highlights confidence levels
- Notes what wasn't investigated (limitations)
- Produces actionable insights, not just numbers

Architecture:
    FindingsAccumulator β†’ Synthesizer.synthesize() β†’ Markdown narrative
"""

import json
from typing import Dict, Any, List, Optional, Callable

from .findings import FindingsAccumulator


SYNTHESIS_SYSTEM_PROMPT = """You are a senior data scientist writing a concise analysis report.

Given the investigation findings, synthesize a clear, evidence-based answer to the user's question.

STRUCTURE (use markdown):
1. **Executive Summary** (2-3 sentences answering the question directly)
2. **Key Findings** (bullet points with evidence references)
3. **Supporting Evidence** (specific metrics, correlations, patterns)
4. **Visualizations** (mention any plots/charts generated, with file paths)
5. **Limitations & Caveats** (what we didn't investigate, caveats)
6. **Recommendations** (actionable next steps)

RULES:
- Lead with the answer, then show evidence
- Use specific numbers (not "high correlation" but "r=0.72")
- Mention generated files/plots so user can find them
- Be honest about confidence levels
- Keep it under 500 words unless complex analysis warrants more
- Use markdown formatting (headers, bullets, bold for emphasis)
- Do NOT wrap your response in code fences (``` or ```markdown) β€” output raw markdown directly
- ONLY report findings from SUCCESSFUL investigation steps
- Do NOT invent numbers, statistics, or insights that are not present in the findings
- If a step is marked [FAILED], ignore its results entirely β€” do not fabricate data from it
- If most steps failed, be transparent about limited evidence and recommend re-running"""

SYNTHESIS_USER_TEMPLATE = """**Original question**: {question}

**Investigation summary**:
{findings_context}

**Generated artifacts**:
{artifacts_summary}

Write the analysis report now. Focus on answering the question with evidence from the investigation."""


class Synthesizer:
    """
    The SYNTHESIZE step of the Reasoning Loop.
    
    Produces the final answer from accumulated evidence.
    
    Usage:
        synthesizer = Synthesizer(llm_caller=orchestrator._llm_text_call)
        report = synthesizer.synthesize(
            findings=findings_accumulator,
            artifacts={"plots": [...], "files": [...]}
        )
    """
    
    def __init__(self, llm_caller: Callable):
        """
        Args:
            llm_caller: Function (system_prompt, user_prompt, max_tokens) -> str
        """
        self.llm_caller = llm_caller

    def synthesize(
        self,
        findings: FindingsAccumulator,
        artifacts: Optional[Dict[str, Any]] = None,
        max_tokens: int = 3000
    ) -> str:
        """
        Synthesize all findings into a coherent answer.
        
        Args:
            findings: Accumulated findings from the reasoning loop
            artifacts: Optional dict of generated artifacts (plots, files, models)
            max_tokens: Max tokens for synthesis response
            
        Returns:
            Markdown-formatted analysis report
        """
        # Build artifacts summary
        artifacts_summary = self._format_artifacts(artifacts or {}, findings)
        
        # Build findings context β€” only successful findings get full detail
        findings_context = self._build_filtered_context(findings)
        
        user_prompt = SYNTHESIS_USER_TEMPLATE.format(
            question=findings.question,
            findings_context=findings_context,
            artifacts_summary=artifacts_summary
        )
        
        response = self.llm_caller(
            system_prompt=SYNTHESIS_SYSTEM_PROMPT,
            user_prompt=user_prompt,
            max_tokens=max_tokens
        )
        
        return self._strip_code_fences(response.strip())

    def synthesize_exploratory(
        self,
        findings: FindingsAccumulator,
        artifacts: Optional[Dict[str, Any]] = None,
        max_tokens: int = 3000
    ) -> str:
        """
        Synthesize findings from exploratory analysis (no specific question).
        
        Uses a different prompt that focuses on discovering patterns
        rather than answering a specific question.
        """
        exploratory_system = """You are a senior data scientist presenting exploratory analysis results.

The user asked for a general analysis. Present the most interesting discoveries.

STRUCTURE (use markdown):
1. **Dataset Overview** (size, structure, key characteristics)
2. **Most Interesting Discoveries** (ranked by insight value)
3. **Key Patterns & Relationships** (correlations, distributions, trends)
4. **Data Quality Notes** (missing data, outliers, issues found)
5. **Visualizations Generated** (list with descriptions)
6. **Recommended Next Steps** (what to investigate deeper)

RULES:
- Lead with the most surprising/important finding
- Use specific numbers and metrics
- Mention all generated visualizations with file paths
- Suggest actionable next analysis steps
- Keep it engaging but data-driven
- Do NOT wrap your response in code fences (``` or ```markdown) β€” output raw markdown directly
- ONLY report findings from SUCCESSFUL investigation steps
- Do NOT invent numbers or statistics not present in the findings
- If a step is marked [FAILED], ignore it entirely"""

        artifacts_summary = self._format_artifacts(artifacts or {}, findings)
        
        # Build filtered context β€” only successful findings
        findings_context = self._build_filtered_context(findings)
        
        user_prompt = f"""**Analysis request**: {findings.question}

**Investigation summary**:
{findings_context}

**Generated artifacts**:
{artifacts_summary}

Write the exploratory analysis report."""

        response = self.llm_caller(
            system_prompt=exploratory_system,
            user_prompt=user_prompt,
            max_tokens=max_tokens
        )
        
        return self._strip_code_fences(response.strip())

    def _strip_code_fences(self, text: str) -> str:
        """
        Remove wrapping code fences from LLM output.
        
        LLMs sometimes wrap markdown in ```markdown ... ``` which causes
        ReactMarkdown to render the entire response as a code block
        instead of parsing the markdown.
        """
        import re
        # Strip leading ```markdown or ``` and trailing ```
        stripped = re.sub(r'^\s*```(?:markdown|md|text)?\s*\n', '', text)
        stripped = re.sub(r'\n\s*```\s*$', '', stripped)
        return stripped.strip()

    def _format_artifacts(self, artifacts: Dict[str, Any], findings: FindingsAccumulator) -> str:
        """Format artifacts for the synthesis prompt."""
        parts = []
        
        # Extract plots from findings
        plots = artifacts.get("plots", [])
        if plots:
            parts.append("**Plots generated**:")
            for plot in plots:
                if isinstance(plot, dict):
                    parts.append(f"  - {plot.get('title', 'Plot')}: {plot.get('url', plot.get('path', 'N/A'))}")
                else:
                    parts.append(f"  - {plot}")
        
        # Extract files from findings
        files = artifacts.get("files", [])
        if files:
            parts.append("**Output files**:")
            for f in files:
                parts.append(f"  - {f}")
        
        # Extract from findings history β€” only from successful steps
        for finding in findings.findings:
            if not finding.success:
                continue
            result = finding.result_summary
            if "output_file" in result or "output_path" in result or ".html" in result or ".png" in result:
                parts.append(f"  - Step {finding.iteration} ({finding.action}): output in result")
        
        # Tools used summary
        if findings.tools_used:
            parts.append(f"\n**Tools used**: {', '.join(findings.tools_used)}")
        
        if not parts:
            return "No artifacts generated yet."
        
        return "\n".join(parts)

    def _build_filtered_context(self, findings: FindingsAccumulator) -> str:
        """
        Build synthesis context that only includes SUCCESSFUL findings in detail.
        Failed findings are listed as a brief summary so the LLM knows they happened
        but cannot hallucinate data from them.
        """
        import json
        
        parts = []
        parts.append(f"**Original question**: {findings.question}")
        parts.append(f"**Mode**: {findings.mode}")
        
        successful = findings.get_successful_findings()
        failed = [f for f in findings.findings if not f.success]
        
        parts.append(f"**Total iterations**: {len(findings.findings)} ({len(successful)} succeeded, {len(failed)} failed)")
        parts.append(f"**Tools used**: {', '.join(findings.tools_used)}")
        
        # Only successful findings get full detail
        if successful:
            parts.append("\n## Successful Investigation Steps\n")
            for f in successful:
                parts.append(
                    f"### Step {f.iteration}: {f.action}\n"
                    f"**Hypothesis**: {f.hypothesis}\n"
                    f"**Arguments**: {json.dumps(f.arguments, default=str)}\n"
                    f"**Result**: {f.result_summary}\n"
                    f"**Interpretation**: {f.interpretation}\n"
                    f"**Confidence**: {f.confidence:.0%}\n"
                )
        
        # Failed findings get just a one-line mention
        if failed:
            parts.append("\n## Failed Steps (no usable data β€” do NOT cite these)\n")
            for f in failed:
                parts.append(f"- Step {f.iteration}: `{f.action}` FAILED β€” {f.error_message or 'execution error'}")
        
        # Hypothesis outcomes
        if findings.hypotheses:
            parts.append("\n## Hypothesis Outcomes\n")
            for h in findings.hypotheses:
                status_emoji = {
                    "supported": "\u2705",
                    "refuted": "\u274c",
                    "inconclusive": "\u2753",
                    "testing": "\ud83d\udd04",
                    "untested": "\u2b1c"
                }.get(h.status, "\u2b1c")
                parts.append(f"{status_emoji} **{h.text}** \u2192 {h.status}")
                if h.evidence_for:
                    parts.append(f"  Evidence for: {'; '.join(h.evidence_for)}")
                if h.evidence_against:
                    parts.append(f"  Evidence against: {'; '.join(h.evidence_against)}")
        
        return "\n".join(parts)