File size: 18,286 Bytes
a559920
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ceafbec
 
6b2c068
 
 
 
 
 
 
 
 
 
 
a559920
 
 
 
 
 
 
 
ceafbec
a559920
 
2bfc8fd
a559920
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ceafbec
a559920
 
2bfc8fd
a559920
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2bfc8fd
a559920
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2bfc8fd
a559920
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2bfc8fd
a559920
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2bfc8fd
a559920
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2bfc8fd
a559920
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ceafbec
2bfc8fd
a559920
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6b2c068
 
a559920
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
from copy import deepcopy
from agents import Runner, Agent, trace, gen_trace_id
from dotenv import load_dotenv
from prompts.agent_prompts import *
from utils import *
from tools.serper_tool import serper_search
from tools.playwright_tool import playwright_web_read
import os
import pdb
import json

load_dotenv(override=True)
default_model_name = os.environ.get('DEFAULT_MODEL_NAME')

class SectionResearchManager:
    def __init__(self, section_name: str, enable_critic: bool = True) -> None:
        self.section_name = section_name
        self.enable_critic = enable_critic

        self.complexity_agent = Agent(
            name=f"Complexity Agent: {section_name}",
            instructions=complexity_agent_system_prompt,
            model=default_model_name
        )
        self.query_gen_agent = Agent(
            name=f"Query Gen Agent: {section_name}",
            instructions=query_gen_agent_system_prompt,
            model=default_model_name
        )
        self.researcher_agent = Agent(
            name=f"Researcher agent: {section_name}",
            instructions=researcher_agent_system_prompt,
            tools=[serper_search],
            model=default_model_name
        )
        self.analyst_agent = Agent(
            name=f"Analyst agent: {section_name}",
            instructions=analyst_agent_system_prompt,
            tools=[playwright_web_read],
            model=default_model_name
        )
        self.critic_agent = Agent(
            name=f"Critic agent: {section_name}",
            instructions=critic_agent_system_prompt,
            model=default_model_name
        )
        self.editor_agent = Agent(
            name=f"Editor agent: {section_name}",
            instructions=editor_agent_system_prompt,
            model=default_model_name
        )

    async def run_section_manager(self, trace_id: str, section_details: Dict, trace_name: str, progress_callback=None) -> Dict:
        section = section_details["section_descriptor"]["section"]
        display_name = section_details["section_descriptor"].get("display_name", section.replace("_", " ").title())
        description = section_details["section_descriptor"].get("description", "")
        topic = section_details["topic_or_idea"]

        # Assign a color to this section for visual distinction
        section_colors = {
            "landscape": "#3498db", "product_categories": "#9b59b6", "tech_stack": "#e74c3c",
            "research_frontier": "#f39c12", "market_signals": "#1abc9c", "unmet_needs": "#e67e22",
            "opportunity_theses": "#2ecc71", "problem_pain": "#3498db", "buyer_budget_owner": "#9b59b6",
            "roi_story": "#e74c3c", "defensibility": "#f39c12", "comp_landscape": "#1abc9c",
            "gtm_channels": "#e67e22", "risks": "#e74c3c"
        }
        section_color = section_colors.get(section, "#95a5a6")

        with trace(f"{trace_name} trace", trace_id=trace_id):
            base_payload = {
                "framework": section_details["framework"],
                "topic_or_idea": section_details["topic_or_idea"],
                "section_descriptor": section_details["section_descriptor"],
                "run_params": section_details.get("run_params", {})
            }

            # ---------- Step 1: Complexity Assessment ----------
            if progress_callback:
                await progress_callback(f'<span style="color: {section_color}; font-weight: bold;">🧠 {display_name} for "{topic}": Analyzing research complexity...</span>')
            print(f"[{section}] Running Complexity Assessment")
            complexity_raw = await Runner.run(self.complexity_agent, as_messages(base_payload))
            
            try:
                complexity_result = json.loads(complexity_raw.final_output)
            except json.JSONDecodeError as e:
                print(f"Error parsing complexity JSON for {section}: {e}")
                complexity_result = {
                    "complexity": "moderate", 
                    "reasoning": "fallback due to parsing error",
                    "recommended_query_count": 12,
                    "search_strategy_notes": "standard approach"
                }
            
            complexity_level = complexity_result.get("complexity", "moderate")
            recommended_count = complexity_result.get("recommended_query_count", 12)
            strategy_notes = complexity_result.get("search_strategy_notes", "")

            print(f"[{section}] Complexity: {complexity_level}, Recommended queries: {recommended_count}")
            if progress_callback:
                await progress_callback(f'<span style="color: {section_color}; font-weight: bold;">πŸ” {display_name} for "{topic}": Planning {recommended_count} search queries (complexity: {complexity_level})</span>')

            # ---------- Step 2: Query Generation ----------
            query_payload = {
                **base_payload,
                "complexity_level": complexity_level,
                "recommended_query_count": recommended_count,
                "search_strategy_notes": strategy_notes
            }
            
            if progress_callback:
                await progress_callback(f"πŸ” Generating search queries for **{section}**...")
            print(f"[{section}] Running Query Generation")
            query_gen_raw = await Runner.run(self.query_gen_agent, as_messages(query_payload))
            
            try:
                query_gen_result = json.loads(query_gen_raw.final_output)
            except json.JSONDecodeError as e:
                print(f"Error parsing query_gen JSON for {section}: {e}")
                query_gen_result = {"queries": []}

            actual_queries = len(query_gen_result.get("queries", []))
            print(f"[{section}] Generated {actual_queries} queries (target: {recommended_count})")
            if progress_callback:
                await progress_callback(f'<span style="color: {section_color}; font-weight: bold;">🌐 {display_name} for "{topic}": Searching the web with {actual_queries} targeted queries...</span>')

            # Update run_params with dynamic query count for researcher
            dynamic_run_params = base_payload["run_params"].copy()
            dynamic_run_params["max_queries"] = recommended_count

            # ---------- Step 3: Research ----------
            researcher_payload = {
                **base_payload, 
                "queries": query_gen_result.get("queries", []),
                "run_params": dynamic_run_params
            }
            print(f"[{section}] Running Researcher")
            researcher_raw = await Runner.run(self.researcher_agent, as_messages(researcher_payload))
            # pdb.set_trace()
            # researcher_result = parse_json(researcher_raw)
            # researcher_result = ensure_keys(researcher_result, {"facts": [], "domains_seen": [], "gap_flags": []})

            try:
                researcher_result = json.loads(researcher_raw.final_output)
            except json.JSONDecodeError as e:
                print(f"Error parsing researcher JSON for {section}: {e}")
                researcher_result = {"facts": [], "domains_seen": [], "gap_flags": []}
            facts_to_url_mapping = {}
            if 'facts' in researcher_result and len(researcher_result['facts'])>0:
                for fact in researcher_result['facts']:
                    fact_id = fact["fact_id"]
                    source_url = fact["source_url"]

                    if fact_id not in facts_to_url_mapping:
                        facts_to_url_mapping[fact_id] = []
                    facts_to_url_mapping[fact_id].append(source_url)


            # ---------- Step 4: Analysis ----------
            facts_count = len(researcher_result.get("facts", []))
            if progress_callback:
                await progress_callback(f'<span style="color: {section_color}; font-weight: bold;">πŸ§ͺ {display_name} for "{topic}": Deep-analyzing {facts_count} facts and sources...</span>')
                
            analyst_payload = {
                **base_payload,
                "facts": researcher_result.get("facts", []),
                "domains_seen": researcher_result.get("domains_seen", []),
                "gap_flags": researcher_result.get("gap_flags", [])
            }
            print(f"[{section}] Running Analyst")
            analyst_raw = await Runner.run(self.analyst_agent, as_messages(analyst_payload))
            
            try:
                analyst_result = json.loads(analyst_raw.final_output)
            except json.JSONDecodeError as e:
                print(f"Error parsing analyst JSON for {section}: {e}")
                analyst_result = {"section": section, "bullets": [], "mini_takeaways": [], "conflicts": [], "gaps_next": []}

            # ---------- Step 5: Quality Assessment (Critic) ----------
            critic_result = {}
            if self.enable_critic:
                if progress_callback:
                    await progress_callback(f'<span style="color: {section_color}; font-weight: bold;">πŸ”¬ {display_name} for "{topic}": Checking quality & identifying gaps...</span>')
                    
                critic_payload = {
                    **base_payload,
                    "facts": researcher_result.get("facts", []),
                    "analyst_json": analyst_result
                }
                print(f"[{section}] Running Quality Assessment (Critic)")
                critic_raw = await Runner.run(self.critic_agent, as_messages(critic_payload))

                try:
                    critic_result = json.loads(critic_raw.final_output)
                except json.JSONDecodeError as e:
                    print(f"Error parsing critic JSON for {section}: {e}")
                    critic_result = {
                        "needs_iteration": False,
                        "iteration_reason": "JSON parse error",
                        "quality_issues": [],
                        "gap_queries": [],
                        "confidence_assessment": 0.5
                    }

            # Extract iteration decision from Critic
            needs_iteration = critic_result.get("needs_iteration", False)
            iteration_reason = critic_result.get("iteration_reason", "")
            critic_confidence = critic_result.get("confidence_assessment", 0.5)
            gap_queries_raw = critic_result.get("gap_queries", [])
            
            print(f"[{section}] Critic assessment - Needs iteration: {needs_iteration}, Confidence: {critic_confidence:.2f}")

            # ---------- Step 6: Self-Healing Research Loop (if needed) ----------
            if self.enable_critic and needs_iteration and len(gap_queries_raw) > 0:
                if progress_callback:
                    await progress_callback(f'<span style="color: {section_color}; font-weight: bold;">πŸ”„ {display_name} for "{topic}": Filling knowledge gaps with {len(gap_queries_raw[:5])} additional searches...</span>')
                print(f"[{section}] Triggering self-healing loop: {iteration_reason}")
                
                # Use Critic's gap queries (already formatted)
                iteration_queries = gap_queries_raw[:5]  # Max 5 gap queries
                
                # Second research round
                iteration_payload = {
                    **base_payload,
                    "queries": iteration_queries,
                    "run_params": {**dynamic_run_params, "max_queries": len(iteration_queries)}
                }
                
                print(f"[{section}] Running iteration research with {len(iteration_queries)} gap queries")
                iteration_researcher_raw = await Runner.run(self.researcher_agent, as_messages(iteration_payload))
                
                try:
                    iteration_researcher_result = json.loads(iteration_researcher_raw.final_output)
                except json.JSONDecodeError as e:
                    print(f"Error parsing iteration researcher JSON for {section}: {e}")
                    iteration_researcher_result = {"facts": [], "domains_seen": [], "gap_flags": []}
                
                # Merge original and iteration facts (handle duplicates)
                all_facts = researcher_result.get("facts", [])
                iteration_facts = iteration_researcher_result.get("facts", [])
                
                # Simple deduplication by claim+entity+source
                seen_fact_keys = set()
                for fact in all_facts:
                    fact_key = f"{fact.get('entity', '')}-{fact.get('claim', '')}-{fact.get('source_url', '')}"
                    seen_fact_keys.add(fact_key)
                
                new_facts = []
                for fact in iteration_facts:
                    fact_key = f"{fact.get('entity', '')}-{fact.get('claim', '')}-{fact.get('source_url', '')}"
                    if fact_key not in seen_fact_keys:
                        new_facts.append(fact)
                        seen_fact_keys.add(fact_key)
                
                merged_facts = all_facts + new_facts
                merged_researcher_result = {
                    **researcher_result,
                    "facts": merged_facts,
                    "domains_seen": list(set(researcher_result.get("domains_seen", []) + iteration_researcher_result.get("domains_seen", [])))
                }
                
                print(f"[{section}] Merged {len(new_facts)} new facts, total: {len(merged_facts)}")
                if progress_callback:
                    await progress_callback(f'<span style="color: {section_color}; font-weight: bold;">πŸ”¬ {display_name} for "{topic}": Re-analyzing with {len(merged_facts)} facts (+{len(new_facts)} new)...</span>')
                
                # Re-run analyst with ALL facts (original + iteration facts)
                iteration_analyst_payload = {
                    **base_payload,  # Use base_payload for consistency
                    "facts": merged_facts,  # This contains ALL facts: original + new from iteration
                    "domains_seen": merged_researcher_result.get("domains_seen", []),
                    "gap_flags": merged_researcher_result.get("gap_flags", [])
                }
                
                print(f"[{section}] Re-running Analyst with expanded facts (total: {len(merged_facts)} facts)")
                iteration_analyst_raw = await Runner.run(self.analyst_agent, as_messages(iteration_analyst_payload))
                
                try:
                    iteration_analyst_result = json.loads(iteration_analyst_raw.final_output)
                except json.JSONDecodeError as e:
                    print(f"Error parsing iteration analyst JSON for {section}: {e}")
                    iteration_analyst_result = analyst_result  # fallback to original
                
                # Update the final facts and analysis for editor
                print(f"[{section}] Iteration complete - updated facts and analysis ready for Editor")
                researcher_result = merged_researcher_result
                analyst_result = iteration_analyst_result
                
                # Update facts_to_url_mapping with new facts
                for fact in new_facts:
                    fact_id = fact["fact_id"]
                    source_url = fact["source_url"]
                    if fact_id not in facts_to_url_mapping:
                        facts_to_url_mapping[fact_id] = []
                    facts_to_url_mapping[fact_id].append(source_url)

            # ---------- Step 7: Editor (Always Runs Once at the End) ----------
            if progress_callback:
                iteration_status = "enhanced with gap-fill" if (self.enable_critic and needs_iteration and len(gap_queries_raw) > 0) else "ready"
                await progress_callback(f'<span style="color: {section_color}; font-weight: bold;">✏️ {display_name} for "{topic}": Finalizing insights ({iteration_status})...</span>')
            
            editor_payload = {
                **base_payload,
                "analyst_json": analyst_result,  # This is either original or iteration-enhanced
                "facts": researcher_result.get("facts", []),  # This is either original or merged facts
                "critic_json": critic_result  # Pass critic assessment to editor
            }
            
            iteration_status = "after iteration" if (self.enable_critic and needs_iteration and len(gap_queries_raw) > 0) else "no iteration"
            print(f"[{section}] Running Editor ({iteration_status})")
            
            editor_raw = await Runner.run(self.editor_agent, as_messages(editor_payload))
            
            try:
                editor_section = json.loads(editor_raw.final_output)
            except json.JSONDecodeError as e:
                print(f"Error parsing editor JSON for {section}: {e}")
                editor_section = {"section": section, "highlights": [], "facts_ref": [], "gaps_next": [], "confidence": critic_confidence}

            # Update facts_ref mapping
            if 'facts_ref' in editor_section and len(editor_section['facts_ref'])>0:
                updated_facts_ref = {}
                for fact_referred_id in editor_section['facts_ref']:
                    if fact_referred_id in facts_to_url_mapping:
                        updated_facts_ref[fact_referred_id] = facts_to_url_mapping[fact_referred_id]

                editor_section['facts_ref'] = deepcopy(updated_facts_ref)

            return {
                "section": section,
                "section_brief": editor_section,
                "display_name": display_name,
                "section_color": section_color,
                "artifacts": {
                    "complexity": complexity_result,
                    "queries": query_gen_result,
                    "facts": researcher_result,
                    "analysis": analyst_result,
                    "critic": critic_result,
                    "facts_to_url_mapping": facts_to_url_mapping,
                    "iteration_triggered": self.enable_critic and needs_iteration and len(gap_queries_raw) > 0
                }
            }