MusaR commited on
Commit
86237fa
Β·
verified Β·
1 Parent(s): c3300de

Update research_agent/agent.py

Browse files
Files changed (1) hide show
  1. research_agent/agent.py +386 -116
research_agent/agent.py CHANGED
@@ -1,25 +1,191 @@
1
  import os
2
  import json
3
  import time
 
4
  from datetime import datetime
5
- from typing import List, Dict, Any, Generator
6
  import google.generativeai as genai
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
 
9
  def get_clarifying_questions(model, topic: str) -> str:
10
- """Generate clarifying questions for a research topic"""
11
  prompt = f"""
12
- You are a research assistant. Given the topic "{topic}", generate 3-5 clarifying questions that would help create a more focused and comprehensive research report.
13
 
14
- The questions should help understand:
15
- - Specific aspects the user is most interested in
16
- - The intended audience or use case
17
- - The scope and depth required
18
- - Any particular angle or perspective needed
19
- - Timeline or geographical focus if relevant
 
20
 
21
- Format your response as a numbered list of questions.
22
- Be concise but thorough.
23
 
24
  Topic: {topic}
25
  """
@@ -29,173 +195,277 @@ Topic: {topic}
29
  return response.text
30
  except Exception as e:
31
  return f"""
32
- 1. What specific aspects of {topic} would you like me to focus on?
33
- 2. Who is the intended audience for this research report?
34
- 3. Are you looking for recent developments, historical context, or both?
35
- 4. What level of detail would be most helpful for your needs?
36
- 5. Are there any particular perspectives or viewpoints you'd like me to include?
37
-
38
- Please provide your answers to help me create the most relevant research report for you.
39
  """
40
 
41
 
42
- def research_and_plan(config, model, tavily_client, topic: str, clarifications: str) -> Dict[str, Any]:
43
- """Create a research plan based on topic and clarifications"""
44
 
45
- prompt = f"""
46
- You are a research planner. Create a comprehensive research plan for the topic: "{topic}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
- User clarifications: {clarifications}
 
49
 
50
- Please provide your response in the following JSON format:
 
 
51
  {{
52
- "detailed_topic": "A refined, specific topic statement based on the clarifications",
53
  "sections": [
54
- {{"title": "Introduction", "description": "Overview and context setting"}},
55
- {{"title": "Background", "description": "Historical context and foundational information"}},
56
- {{"title": "Current State", "description": "Present situation and recent developments"}},
57
- {{"title": "Key Findings", "description": "Main research findings and data"}},
58
- {{"title": "Analysis", "description": "Critical analysis and insights"}},
59
- {{"title": "Future Implications", "description": "Trends and future outlook"}},
60
- {{"title": "Conclusion", "description": "Summary and key takeaways"}}
61
- ],
62
- "research_questions": [
63
- "What is the current state of {topic}?",
64
- "What are the key challenges and opportunities?",
65
- "What are the future implications and trends?"
66
  ]
67
  }}
68
 
69
- Make sure the JSON is valid and properly formatted.
70
  """
71
 
72
  try:
73
- response = model.generate_content(prompt)
74
- response_text = response.text.strip()
 
 
75
 
76
- # Try to extract JSON from the response
 
77
  json_start = response_text.find('{')
78
  json_end = response_text.rfind('}') + 1
79
 
80
  if json_start != -1 and json_end != -1:
81
  json_text = response_text[json_start:json_end]
82
  plan_data = json.loads(json_text)
 
83
  else:
84
- raise ValueError("No valid JSON found in response")
85
 
86
- return plan_data
87
-
88
  except Exception as e:
89
- print(f"Error in research_and_plan: {str(e)}")
90
  # Fallback plan
91
  return {
92
- "detailed_topic": f"Comprehensive Analysis of {topic}",
93
  "sections": [
94
- {"title": "Introduction", "description": "Overview and context"},
95
- {"title": "Background", "description": "Historical context and current state"},
96
- {"title": "Key Findings", "description": "Main research findings and data"},
97
- {"title": "Current Trends", "description": "Recent developments and patterns"},
98
- {"title": "Analysis", "description": "Critical analysis and insights"},
99
- {"title": "Future Outlook", "description": "Predictions and implications"},
100
- {"title": "Conclusion", "description": "Summary and recommendations"}
101
- ],
102
- "research_questions": [
103
- f"What is the current state of {topic}?",
104
- f"What are the key challenges in {topic}?",
105
- f"What are the future implications of {topic}?"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  ]
107
  }
108
 
109
 
110
- def write_report_stream(config, model, tavily_client, embedding_model, reranker, plan: Dict[str, Any]) -> Generator[str, None, None]:
111
- """Generate a research report section by section with progress updates"""
112
 
113
- sections = plan.get('sections', [])
114
  detailed_topic = plan.get('detailed_topic', 'Research Topic')
 
115
 
116
- # Start with report header
117
- report_content = f"# {detailed_topic}\n\n"
118
- report_content += f"*Research Report Generated on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}*\n\n"
119
 
120
- yield "Step 1: Initializing research framework"
121
- time.sleep(0.5)
 
122
 
123
- yield "Step 2: Conducting preliminary search"
124
- time.sleep(0.3)
125
 
126
- # Conduct initial broad search
127
- try:
128
- search_results = tavily_client.search(
129
- query=detailed_topic,
130
- max_results=5,
131
- search_depth="advanced"
132
- )
133
- yield f"Found {len(search_results.get('results', []))} initial sources"
134
- except Exception as e:
135
- yield f"Search warning: {str(e)}"
136
- search_results = {"results": []}
137
-
138
- time.sleep(0.3)
139
-
140
- # Process each section
141
  for i, section in enumerate(sections):
142
  section_title = section.get('title', f'Section {i+1}')
143
  section_desc = section.get('description', '')
 
 
 
144
 
145
- yield f"Writing section {i+1}/{len(sections)}: {section_title}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
 
147
- # Generate content for this section
148
- section_prompt = f"""
 
 
149
  Write a comprehensive section titled "{section_title}" for a research report on "{detailed_topic}".
150
 
151
- Section focus: {section_desc}
 
 
 
152
 
153
  Requirements:
154
- - Write 3-5 well-structured paragraphs
155
- - Include relevant facts, data, or examples
156
- - Use professional academic writing style
157
- - Ensure content is accurate and informative
158
- - Connect this section logically to the overall topic
 
159
 
160
- Write only the section content without the title (it will be added separately).
 
161
  """
162
 
163
  try:
164
- section_response = model.generate_content(
165
- section_prompt,
166
  generation_config=genai.types.GenerationConfig(
167
- temperature=0.4,
168
- max_output_tokens=1000
169
  )
170
  )
171
- section_content = section_response.text.strip()
172
  except Exception as e:
173
- section_content = f"This section would cover {section_desc.lower()}. Further research would be needed for detailed information."
 
 
 
 
 
174
 
175
  # Add section to report
176
- report_content += f"## {section_title}\n\n{section_content}\n\n"
 
 
 
 
 
 
177
 
178
- # Yield current progress with updated report
179
- yield report_content
180
- time.sleep(0.4)
181
 
182
- # Add sources section
183
- yield "Adding sources and references"
184
 
185
- sources_content = "## Sources and References\n\n"
 
 
186
 
187
- if search_results.get('results'):
188
- for i, result in enumerate(search_results['results'][:5], 1):
189
- title = result.get('title', 'Source')
190
- url = result.get('url', '#')
191
- sources_content += f"{i}. [{title}]({url})\n"
192
- else:
193
- sources_content += "*Note: This report was generated using AI knowledge. For academic use, please verify with current sources.*\n"
 
 
 
 
 
 
 
 
 
 
 
194
 
195
- report_content += sources_content
196
 
197
- yield "Research report completed successfully"
198
- time.sleep(0.2)
199
 
200
  # Final yield with complete report
201
  yield report_content
 
1
  import os
2
  import json
3
  import time
4
+ import re
5
  from datetime import datetime
6
+ from typing import List, Dict, Any, Generator, Tuple
7
  import google.generativeai as genai
8
+ from tavily import TavilyClient
9
+ from sentence_transformers import SentenceTransformer, CrossEncoder
10
+ import numpy as np
11
+ from urllib.parse import urlparse
12
+ import hashlib
13
+
14
+
15
+ class RAGPipeline:
16
+ """RAG pipeline for document indexing, retrieval and re-ranking"""
17
+
18
+ def __init__(self, embedding_model, reranker):
19
+ self.embedding_model = embedding_model
20
+ self.reranker = reranker
21
+ self.documents = []
22
+ self.embeddings = None
23
+
24
+ def chunk_text(self, text: str, chunk_size: int = 1000, overlap: int = 200) -> List[str]:
25
+ """Chunk text into overlapping segments"""
26
+ if len(text) <= chunk_size:
27
+ return [text]
28
+
29
+ chunks = []
30
+ start = 0
31
+ while start < len(text):
32
+ end = start + chunk_size
33
+ chunk = text[start:end]
34
+
35
+ # Try to end on sentence boundary
36
+ if end < len(text):
37
+ last_period = chunk.rfind('. ')
38
+ if last_period > chunk_size // 2:
39
+ end = start + last_period + 2
40
+ chunk = text[start:end]
41
+
42
+ chunks.append(chunk.strip())
43
+ start = end - overlap
44
+
45
+ return chunks
46
+
47
+ def index_research(self, research_items: List[Dict]):
48
+ """Index research documents for retrieval"""
49
+ self.documents = []
50
+
51
+ for item in research_items:
52
+ content = item.get('content', '')
53
+ source = item.get('url', 'Unknown')
54
+ title = item.get('title', 'Untitled')
55
+
56
+ # Chunk the content
57
+ chunks = self.chunk_text(content)
58
+
59
+ for i, chunk in enumerate(chunks):
60
+ if len(chunk.strip()) > 100: # Skip very short chunks
61
+ self.documents.append({
62
+ 'content': chunk,
63
+ 'source': source,
64
+ 'title': title,
65
+ 'chunk_id': i
66
+ })
67
+
68
+ if self.documents:
69
+ # Generate embeddings
70
+ texts = [doc['content'] for doc in self.documents]
71
+ self.embeddings = self.embedding_model.encode(texts, show_progress_bar=False)
72
+
73
+ def retrieve_and_rerank(self, query: str, top_k: int = 10) -> List[Dict]:
74
+ """Retrieve and re-rank relevant chunks"""
75
+ if not self.documents or self.embeddings is None:
76
+ return []
77
+
78
+ # Semantic search
79
+ query_embedding = self.embedding_model.encode([query])
80
+ similarities = np.dot(query_embedding, self.embeddings.T)[0]
81
+
82
+ # Get top candidates (more than final top_k for re-ranking)
83
+ top_indices = np.argsort(similarities)[::-1][:top_k * 2]
84
+ candidates = [self.documents[i] for i in top_indices]
85
+
86
+ # Re-rank with cross-encoder
87
+ pairs = [(query, doc['content']) for doc in candidates]
88
+ scores = self.reranker.predict(pairs)
89
+
90
+ # Sort by re-ranking scores
91
+ ranked_results = []
92
+ for doc, score in zip(candidates, scores):
93
+ doc_copy = doc.copy()
94
+ doc_copy['relevance_score'] = float(score)
95
+ ranked_results.append(doc_copy)
96
+
97
+ ranked_results.sort(key=lambda x: x['relevance_score'], reverse=True)
98
+ return ranked_results[:top_k]
99
+
100
+
101
+ def gather_research(tavily_client, queries: List[str], max_results_per_query: int = 5) -> List[Dict]:
102
+ """Gather research from multiple search queries"""
103
+ all_results = []
104
+ seen_urls = set()
105
+
106
+ for query in queries:
107
+ try:
108
+ print(f" Searching: {query[:50]}...")
109
+ search_results = tavily_client.search(
110
+ query=query,
111
+ max_results=max_results_per_query,
112
+ search_depth="advanced",
113
+ include_answer=True,
114
+ include_raw_content=True
115
+ )
116
+
117
+ for result in search_results.get('results', []):
118
+ url = result.get('url', '')
119
+ if url and url not in seen_urls:
120
+ seen_urls.add(url)
121
+ all_results.append({
122
+ 'title': result.get('title', 'Unknown'),
123
+ 'url': url,
124
+ 'content': result.get('content', ''),
125
+ 'raw_content': result.get('raw_content', ''),
126
+ 'score': result.get('score', 0.0),
127
+ 'query': query
128
+ })
129
+
130
+ time.sleep(0.5) # Rate limiting
131
+
132
+ except Exception as e:
133
+ print(f" Search error for '{query}': {str(e)}")
134
+ continue
135
+
136
+ return all_results
137
+
138
+
139
+ def run_verification_step(writer_model, section_text: str, research_context: str) -> str:
140
+ """Verify claims and check for hallucinations"""
141
+ verification_prompt = f"""
142
+ You are a fact-checker. Review this section and the research context to identify any potential inaccuracies, unsupported claims, or hallucinations.
143
+
144
+ SECTION TO VERIFY:
145
+ {section_text}
146
+
147
+ RESEARCH CONTEXT:
148
+ {research_context[:3000]}
149
+
150
+ Check for:
151
+ 1. Claims not supported by the research
152
+ 2. Factual inaccuracies
153
+ 3. Misleading statements
154
+ 4. Missing context
155
+
156
+ If the section is accurate and well-supported, respond with "VERIFIED: Section is accurate."
157
+ If issues are found, respond with "ISSUES FOUND:" followed by specific problems and suggested corrections.
158
+ """
159
+
160
+ try:
161
+ response = writer_model.generate_content(
162
+ verification_prompt,
163
+ generation_config=genai.types.GenerationConfig(temperature=0.1)
164
+ )
165
+ verification_result = response.text
166
+
167
+ if "VERIFIED" in verification_result.upper():
168
+ return section_text
169
+ else:
170
+ return f"{section_text}\n\n*Verification Note: {verification_result}*"
171
+ except Exception as e:
172
+ return section_text
173
 
174
 
175
  def get_clarifying_questions(model, topic: str) -> str:
176
+ """Generate clarifying questions for research focus"""
177
  prompt = f"""
178
+ You are a research strategist. For the topic "{topic}", generate 4-6 specific clarifying questions that will help create a more focused and comprehensive research report.
179
 
180
+ Focus on:
181
+ - Specific aspects or subtopics of interest
182
+ - Target audience and use case
183
+ - Geographical or temporal scope
184
+ - Depth and technical level required
185
+ - Particular perspectives or angles
186
+ - Current vs historical focus
187
 
188
+ Format as numbered questions. Be specific and actionable.
 
189
 
190
  Topic: {topic}
191
  """
 
195
  return response.text
196
  except Exception as e:
197
  return f"""
198
+ 1. What specific aspects of {topic} are you most interested in exploring?
199
+ 2. Who is the intended audience for this research?
200
+ 3. Are you looking for recent developments, historical analysis, or both?
201
+ 4. What geographic regions or markets should be the focus?
202
+ 5. What level of technical detail is appropriate?
203
+ 6. Are there particular challenges or opportunities you want to emphasize?
 
204
  """
205
 
206
 
207
+ def research_and_plan(config, planner_model, tavily_client, topic: str, clarifications: str) -> Dict[str, Any]:
208
+ """Create comprehensive research plan with search strategies"""
209
 
210
+ # Step 1: Construct detailed research brief
211
+ brief_prompt = f"""
212
+ Based on the initial topic and user clarifications, create a detailed, focused research brief.
213
+
214
+ Initial Topic: {topic}
215
+ User Clarifications: {clarifications}
216
+
217
+ Create a refined, specific research focus that incorporates the user's requirements. Be precise about scope, angle, and key areas to investigate.
218
+
219
+ Respond with just the refined research brief (2-3 sentences):
220
+ """
221
+
222
+ try:
223
+ response = planner_model.generate_content(brief_prompt)
224
+ detailed_topic = response.text.strip()
225
+ except Exception as e:
226
+ detailed_topic = f"Comprehensive analysis of {topic}"
227
+
228
+ # Step 2: Initial broad research for context
229
+ print("Conducting initial research for planning...")
230
+ initial_queries = [detailed_topic, f"{topic} overview", f"{topic} recent developments"]
231
+ initial_research = gather_research(tavily_client, initial_queries, 3)
232
+
233
+ planning_context = "\n\n".join([
234
+ f"Source: {item['title']}\n{item['content'][:500]}"
235
+ for item in initial_research[:10]
236
+ ])
237
+
238
+ # Step 3: Generate detailed section plan
239
+ planning_prompt = f"""
240
+ Create a comprehensive research plan for: {detailed_topic}
241
 
242
+ Research Context:
243
+ {planning_context}
244
 
245
+ Generate 6-8 detailed sections with specific search strategies for each.
246
+
247
+ Respond in JSON format:
248
  {{
249
+ "detailed_topic": "{detailed_topic}",
250
  "sections": [
251
+ {{
252
+ "title": "Section Title",
253
+ "description": "Detailed description of what this section will cover",
254
+ "search_queries": ["specific query 1", "specific query 2", "specific query 3"],
255
+ "key_questions": ["key question 1", "key question 2"]
256
+ }}
 
 
 
 
 
 
257
  ]
258
  }}
259
 
260
+ Make search queries specific and varied to capture different perspectives and sources.
261
  """
262
 
263
  try:
264
+ response = planner_model.generate_content(
265
+ planning_prompt,
266
+ generation_config=genai.types.GenerationConfig(temperature=0.3)
267
+ )
268
 
269
+ # Extract JSON from response
270
+ response_text = response.text.strip()
271
  json_start = response_text.find('{')
272
  json_end = response_text.rfind('}') + 1
273
 
274
  if json_start != -1 and json_end != -1:
275
  json_text = response_text[json_start:json_end]
276
  plan_data = json.loads(json_text)
277
+ return plan_data
278
  else:
279
+ raise ValueError("No valid JSON found")
280
 
 
 
281
  except Exception as e:
282
+ print(f"Planning error: {str(e)}")
283
  # Fallback plan
284
  return {
285
+ "detailed_topic": detailed_topic,
286
  "sections": [
287
+ {
288
+ "title": "Introduction and Background",
289
+ "description": "Historical context and foundational overview",
290
+ "search_queries": [f"{topic} history", f"{topic} background", f"what is {topic}"],
291
+ "key_questions": [f"What is {topic}?", f"How did {topic} develop?"]
292
+ },
293
+ {
294
+ "title": "Current State and Recent Developments",
295
+ "description": "Present situation and latest updates",
296
+ "search_queries": [f"{topic} 2024", f"{topic} recent news", f"{topic} current trends"],
297
+ "key_questions": [f"What is the current state of {topic}?", "What are recent developments?"]
298
+ },
299
+ {
300
+ "title": "Key Players and Market Analysis",
301
+ "description": "Important organizations, companies, and market dynamics",
302
+ "search_queries": [f"{topic} companies", f"{topic} market leaders", f"{topic} industry analysis"],
303
+ "key_questions": ["Who are the key players?", "What is the market structure?"]
304
+ },
305
+ {
306
+ "title": "Challenges and Opportunities",
307
+ "description": "Current challenges and future opportunities",
308
+ "search_queries": [f"{topic} challenges", f"{topic} opportunities", f"{topic} problems"],
309
+ "key_questions": ["What are the main challenges?", "What opportunities exist?"]
310
+ },
311
+ {
312
+ "title": "Future Outlook and Trends",
313
+ "description": "Predictions and emerging trends",
314
+ "search_queries": [f"{topic} future", f"{topic} predictions", f"{topic} trends 2024"],
315
+ "key_questions": ["What does the future hold?", "What trends are emerging?"]
316
+ },
317
+ {
318
+ "title": "Conclusion and Implications",
319
+ "description": "Summary and broader implications",
320
+ "search_queries": [f"{topic} implications", f"{topic} impact", f"{topic} summary"],
321
+ "key_questions": ["What are the key takeaways?", "What are the broader implications?"]
322
+ }
323
  ]
324
  }
325
 
326
 
327
+ def write_report_stream(config, writer_model, tavily_client, embedding_model, reranker, plan: Dict[str, Any]) -> Generator[str, None, None]:
328
+ """Generate comprehensive research report with proper citations"""
329
 
 
330
  detailed_topic = plan.get('detailed_topic', 'Research Topic')
331
+ sections = plan.get('sections', [])
332
 
333
+ # Initialize report state
334
+ report_content = f"# Deep Research Report: {detailed_topic}\n\n"
335
+ report_content += f"*Generated on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}*\n\n"
336
 
337
+ all_sources = {}
338
+ citation_counter = 1
339
+ rag_pipeline = RAGPipeline(embedding_model, reranker)
340
 
341
+ yield f"πŸ”¬ **Starting Deep Research Process**\n\n**Topic:** {detailed_topic}\n**Sections:** {len(sections)}\n\n---\n\n"
 
342
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
343
  for i, section in enumerate(sections):
344
  section_title = section.get('title', f'Section {i+1}')
345
  section_desc = section.get('description', '')
346
+ search_queries = section.get('search_queries', [f"{detailed_topic} {section_title}"])
347
+
348
+ yield f"### πŸ“ Section {i+1}/{len(sections)}: {section_title}\n\n"
349
 
350
+ # Gather research for this section
351
+ yield f"πŸ” **Searching web sources...**\n"
352
+ for j, query in enumerate(search_queries[:3]): # Limit to 3 queries per section
353
+ yield f" β†’ Query {j+1}: `{query}`\n"
354
+
355
+ section_research = gather_research(tavily_client, search_queries, config.DEEP_DIVE_SEARCH_RESULTS)
356
+
357
+ if not section_research:
358
+ yield f"⚠️ No sources found for this section\n\n"
359
+ continue
360
+
361
+ yield f"βœ… **Found {len(section_research)} sources**\n\n"
362
+ yield f"πŸ“š **Processing and ranking content...**\n"
363
+
364
+ # Index and retrieve relevant content
365
+ rag_pipeline.index_research(section_research)
366
+ relevant_chunks = rag_pipeline.retrieve_and_rerank(
367
+ section_desc,
368
+ top_k=config.CHUNKS_TO_USE_FOR_WRITING
369
+ )
370
+
371
+ # Build context with citations
372
+ context_for_llm = ""
373
+ section_sources = {}
374
+
375
+ for chunk in relevant_chunks:
376
+ source_url = chunk['source']
377
+ if source_url not in all_sources:
378
+ all_sources[source_url] = {
379
+ 'number': citation_counter,
380
+ 'title': chunk.get('title', 'Unknown Title'),
381
+ 'url': source_url
382
+ }
383
+ citation_counter += 1
384
+
385
+ source_num = all_sources[source_url]['number']
386
+ section_sources[source_url] = source_num
387
+ context_for_llm += f"[Source {source_num}] {chunk['content']}\n\n"
388
 
389
+ yield f"✍️ **Writing section content...**\n"
390
+
391
+ # Generate section content
392
+ writer_prompt = f"""
393
  Write a comprehensive section titled "{section_title}" for a research report on "{detailed_topic}".
394
 
395
+ Section Description: {section_desc}
396
+
397
+ Research Context:
398
+ {context_for_llm}
399
 
400
  Requirements:
401
+ - Write 4-6 well-structured paragraphs
402
+ - Use information from the provided sources
403
+ - Include in-text citations using [Source X] format
404
+ - Maintain academic writing style
405
+ - Ensure accuracy and relevance
406
+ - Connect logically to the overall topic
407
 
408
+ Write only the section content (without the title - it will be added automatically).
409
+ Include proper citations for all claims using the [Source X] format provided in the context.
410
  """
411
 
412
  try:
413
+ response = writer_model.generate_content(
414
+ writer_prompt,
415
  generation_config=genai.types.GenerationConfig(
416
+ temperature=config.WRITER_TEMPERATURE,
417
+ max_output_tokens=1500
418
  )
419
  )
420
+ section_content = response.text.strip()
421
  except Exception as e:
422
+ section_content = f"Error generating content: {str(e)}"
423
+
424
+ yield f"πŸ” **Fact-checking content...**\n"
425
+
426
+ # Verification step
427
+ verified_content = run_verification_step(writer_model, section_content, context_for_llm[:2000])
428
 
429
  # Add section to report
430
+ section_bibliography = "\n".join([
431
+ f"[{num}] {all_sources[url]['title']} - {url}"
432
+ for url, num in section_sources.items()
433
+ ])
434
+
435
+ final_section = f"## {section_title}\n\n{verified_content}\n\n**Section Sources:**\n{section_bibliography}\n\n"
436
+ report_content += final_section
437
 
438
+ yield f"βœ… **Section {i+1} completed**\n\n---\n\n"
 
 
439
 
440
+ # Add master bibliography
441
+ yield f"πŸ“‹ **Compiling final bibliography...**\n"
442
 
443
+ master_bibliography = "## Complete Bibliography\n\n"
444
+ for source_data in sorted(all_sources.values(), key=lambda x: x['number']):
445
+ master_bibliography += f"[{source_data['number']}] {source_data['title']}\n {source_data['url']}\n\n"
446
 
447
+ report_content += master_bibliography
448
+
449
+ # Add methodology section
450
+ methodology = f"""## Research Methodology
451
+
452
+ This report was generated using a comprehensive research methodology:
453
+
454
+ 1. **Topic Refinement**: Initial topic was refined based on user clarifications
455
+ 2. **Multi-Query Search**: Each section used 3-5 targeted search queries
456
+ 3. **Source Gathering**: Collected {len(all_sources)} unique sources using advanced web search
457
+ 4. **Content Processing**: Documents were chunked and embedded for semantic retrieval
458
+ 5. **Relevance Ranking**: Used cross-encoder re-ranking for optimal content selection
459
+ 6. **Citation Integration**: All claims are supported by cited sources
460
+ 7. **Fact Verification**: Each section underwent verification for accuracy
461
+ 8. **Quality Assurance**: Final review for coherence and completeness
462
+
463
+ *Generated on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} using AI-powered research pipeline*
464
+ """
465
 
466
+ report_content += methodology
467
 
468
+ yield f"πŸŽ‰ **Research Complete!**\n\n**Final Report:**\n- {len(sections)} sections\n- {len(all_sources)} sources cited\n- {len(report_content.split())} words\n\n---\n\n"
 
469
 
470
  # Final yield with complete report
471
  yield report_content