AyobamiMichael commited on
Commit
bcf0130
·
verified ·
1 Parent(s): fdf8198

Upload 16 files

Browse files
agents/__init__.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ agents package
3
+ Specialized AI agents for the multi-agent system
4
+ """
5
+
6
+ from .analyst_agent import AnalystAgent
7
+ from .evaluator_agent import EvaluatorAgent
8
+ from .innovator_agent import InnovatorAgent
9
+ from .writer_agent import WriterAgent
10
+
11
+ __all__ = ['AnalystAgent', 'EvaluatorAgent', 'InnovatorAgent', 'WriterAgent']
agents/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (477 Bytes). View file
 
agents/__pycache__/analyst_agent.cpython-310.pyc ADDED
Binary file (8.34 kB). View file
 
agents/__pycache__/evaluator_agent.cpython-310.pyc ADDED
Binary file (10.5 kB). View file
 
agents/__pycache__/innovator_agent.cpython-310.pyc ADDED
Binary file (12.8 kB). View file
 
agents/__pycache__/writer_agent.cpython-310.pyc ADDED
Binary file (16.6 kB). View file
 
agents/analyst_agent.py ADDED
@@ -0,0 +1,339 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ agents/analyst_agent.py
3
+ Extract and analyze research paper content
4
+ """
5
+
6
+ import sys
7
+ import os
8
+
9
+ # Add parent directory to path
10
+ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
11
+
12
+ from demo_phase1 import BaseAgent, Message, MessageType
13
+ from typing import Dict, Any
14
+ import json
15
+
16
+
17
+ class AnalystAgent(BaseAgent):
18
+ """
19
+ Analyst Agent - Paper Analysis & Information Extraction
20
+
21
+ Role: Extract structured information from research papers
22
+ Personality: Precise, detail-oriented, technical
23
+
24
+ Capabilities:
25
+ - Extract paper metadata (title, authors, year)
26
+ - Identify key contributions
27
+ - Analyze methodology
28
+ - Assess novelty
29
+ - Identify gaps and unclear sections
30
+ """
31
+
32
+ def __init__(self, message_queue, llm, pdf_reader):
33
+ super().__init__(
34
+ name="analyst",
35
+ role="Paper Analysis & Information Extraction",
36
+ message_queue=message_queue
37
+ )
38
+ self.llm = llm
39
+ self.pdf_reader = pdf_reader
40
+
41
+
42
+
43
+ def process(self, message: Message) -> Dict[str, Any]:
44
+ """
45
+ Process analysis request
46
+
47
+ Expected message content:
48
+ {
49
+ 'action': 'analyze',
50
+ 'paper_path': 'path/to/paper.pdf'
51
+ }
52
+
53
+ Returns:
54
+ {
55
+ 'title': '...',
56
+ 'authors': [...],
57
+ 'key_contributions': [...],
58
+ 'methodology': {...},
59
+ 'results': {...},
60
+ 'novelty_assessment': {...},
61
+ 'gaps_identified': [...]
62
+ }
63
+ """
64
+ action = message.content.get('action')
65
+
66
+ if action != 'analyze':
67
+ return {'error': f'Unknown action: {action}'}
68
+
69
+ paper_path = message.content.get('paper_path')
70
+
71
+ if not paper_path:
72
+ return {'error': 'No paper_path provided'}
73
+
74
+ print(f"📄 Analyst: Processing paper: {paper_path}")
75
+
76
+ try:
77
+ # Extract text from PDF
78
+ paper_info = self.pdf_reader.get_paper_info(paper_path)
79
+ full_text = self.pdf_reader.extract_text(paper_path)
80
+
81
+ print(f"✅ Analyst: Extracted {len(full_text)} characters")
82
+
83
+ # Analyze paper
84
+ analysis = self._analyze_paper(
85
+ full_text=full_text[:10000], # First 10K chars
86
+ abstract=paper_info.get('abstract', ''),
87
+ metadata=paper_info.get('metadata', {})
88
+ )
89
+
90
+ print(f"✅ Analyst: Analysis complete")
91
+
92
+ return analysis
93
+
94
+ except Exception as e:
95
+ print(f"❌ Analyst error: {e}")
96
+ return {'error': str(e)}
97
+
98
+ def _analyze_paper(
99
+ self,
100
+ full_text: str,
101
+ abstract: str,
102
+ metadata: Dict
103
+ ) -> Dict[str, Any]:
104
+ """Use LLM to extract structured information"""
105
+
106
+ print("🧠 Analyst: Calling LLM for analysis...")
107
+
108
+ # Build analysis prompt
109
+ prompt = f"""Analyze this research paper and extract key information.
110
+
111
+ Paper Metadata:
112
+ - Title: {metadata.get('title', 'Not found')}
113
+ - Author: {metadata.get('author', 'Not found')}
114
+ - Pages: {metadata.get('num_pages', 'Unknown')}
115
+
116
+ Abstract:
117
+ {abstract if abstract else 'Abstract not extracted'}
118
+
119
+ Paper Text (first part):
120
+ {full_text}
121
+
122
+ Extract the following information:
123
+ 1. **Title**: The paper's title (if not in metadata, extract from text)
124
+ 2. **Authors**: List of author names
125
+ 3. **Year**: Publication year if mentioned
126
+ 4. **Venue**: Conference or journal name if mentioned
127
+ 5. **Key Contributions**: 3-5 main contributions of this paper
128
+ 6. **Methodology**: Brief description of the approach/method used
129
+ 7. **Datasets**: What datasets were used (if any)
130
+ 8. **Evaluation Metrics**: Metrics used to evaluate (if mentioned)
131
+ 9. **Main Results**: Key findings or performance improvements
132
+ 10. **Limitations**: Any limitations mentioned by authors
133
+ 11. **Novelty Score**: Rate the novelty from 0-10 with brief reasoning
134
+ 12. **Gaps**: Any unclear sections or missing information
135
+
136
+ Be precise and extract only information clearly stated in the paper."""
137
+
138
+
139
+ # Define expected schema
140
+ schema = {
141
+ "title": "string",
142
+ "authors": ["string"],
143
+ "year": "number or null",
144
+ "venue": "string or null",
145
+ "key_contributions": ["string"],
146
+ "methodology": {
147
+ "approach": "string",
148
+ "datasets": ["string"],
149
+ "evaluation_metrics": ["string"]
150
+ },
151
+ "main_results": {
152
+ "summary": "string",
153
+ "performance_improvements": ["string"]
154
+ },
155
+ "limitations": ["string"],
156
+ "novelty_assessment": {
157
+ "score": "number (0-10)",
158
+ "reasoning": "string"
159
+ },
160
+ "gaps_identified": ["string"]
161
+ }
162
+
163
+ # Call LLM with structured output
164
+ try:
165
+ analysis = self.llm.generate_structured(
166
+ prompt=prompt,
167
+ schema=schema,
168
+ max_tokens=2000,
169
+ temperature=0.3 # Lower for more precise extraction
170
+ )
171
+
172
+ print(f"✅ Analyst: LLM analysis successful")
173
+
174
+ # Add metadata
175
+ analysis['extraction_metadata'] = {
176
+ 'source': metadata.get('title', 'Unknown'),
177
+ 'pages': metadata.get('num_pages', 0),
178
+ 'text_length': len(full_text),
179
+ 'abstract_available': bool(abstract)
180
+ }
181
+
182
+ return analysis
183
+
184
+ except Exception as e:
185
+ print(f"❌ Analyst LLM error: {e}")
186
+
187
+ # Return fallback analysis
188
+ return {
189
+ 'title': metadata.get('title', 'Unknown'),
190
+ 'authors': [metadata.get('author', 'Unknown')],
191
+ 'year': None,
192
+ 'venue': None,
193
+ 'key_contributions': ['Could not extract - LLM error'],
194
+ 'methodology': {
195
+ 'approach': 'Could not extract',
196
+ 'datasets': [],
197
+ 'evaluation_metrics': []
198
+ },
199
+ 'main_results': {
200
+ 'summary': 'Could not extract',
201
+ 'performance_improvements': []
202
+ },
203
+ 'limitations': [],
204
+ 'novelty_assessment': {
205
+ 'score': 0,
206
+ 'reasoning': f'Analysis failed: {str(e)}'
207
+ },
208
+ 'gaps_identified': [f'LLM analysis error: {str(e)}'],
209
+ 'error': str(e)
210
+ }
211
+
212
+ def quick_summary(self, paper_path: str) -> str:
213
+ """Generate a quick one-paragraph summary"""
214
+
215
+ print(f"📝 Analyst: Generating quick summary for {paper_path}")
216
+
217
+ try:
218
+ paper_info = self.pdf_reader.get_paper_info(paper_path)
219
+ text_sample = self.pdf_reader.extract_text(paper_path)[:5000]
220
+
221
+ prompt = f"""Provide a concise one-paragraph summary of this research paper.
222
+
223
+ Title: {paper_info.get('metadata', {}).get('title', 'Unknown')}
224
+
225
+ Text:
226
+ {text_sample}
227
+
228
+
229
+ Summary (1 paragraph, 3-5 sentences):"""
230
+
231
+ summary = self.llm.generate(
232
+ prompt=prompt,
233
+ max_tokens=200,
234
+ temperature=0.5
235
+ )
236
+
237
+ return summary.strip()
238
+
239
+ except Exception as e:
240
+ return f"Could not generate summary: {str(e)}"
241
+
242
+ def identify_research_gaps(self, analysis: Dict[str, Any]) -> list:
243
+ """Identify potential research gaps based on analysis"""
244
+
245
+ print("🔍 Analyst: Identifying research gaps...")
246
+
247
+ prompt = f"""Based on this paper analysis, identify 3-5 potential research gaps or future directions:
248
+
249
+ Key Contributions:
250
+ {json.dumps(analysis.get('key_contributions', []), indent=2)}
251
+
252
+ Methodology:
253
+ {json.dumps(analysis.get('methodology', {}), indent=2)}
254
+
255
+ Limitations:
256
+ {json.dumps(analysis.get('limitations', []), indent=2)}
257
+
258
+ Identify:
259
+ 1. What questions remain unanswered?
260
+ 2. What extensions could be explored?
261
+ 3. What weaknesses could be addressed?
262
+ 4. What new applications could be investigated?
263
+
264
+ Provide 3-5 concrete research gaps."""
265
+
266
+ try:
267
+ response = self.llm.generate(
268
+ prompt=prompt,
269
+ max_tokens=500,
270
+ temperature=0.7
271
+ )
272
+
273
+ # Parse into list
274
+ gaps = [line.strip() for line in response.split('\n') if line.strip()]
275
+ return gaps
276
+
277
+ except Exception as e:
278
+ print(f"❌ Gap identification error: {e}")
279
+ return ["Could not identify gaps due to error"]
280
+
281
+
282
+ # ==================== DEMO ====================
283
+
284
+ def demo_analyst():
285
+ """Demo the Analyst Agent"""
286
+
287
+ print("="*60)
288
+ print("📊 ANALYST AGENT DEMO")
289
+ print("="*60)
290
+ print()
291
+
292
+ # Initialize dependencies
293
+ from llm_wrapper import LLMWrapper
294
+ from pdf_reader import PDFReader
295
+ from demo_phase1 import MessageQueue
296
+
297
+ llm = LLMWrapper(model='fast')
298
+ pdf_reader = PDFReader()
299
+ queue = MessageQueue()
300
+
301
+ # Create analyst agent
302
+ analyst = AnalystAgent(queue, llm, pdf_reader)
303
+
304
+ print(f"✅ {analyst.name} initialized")
305
+ print(f" Role: {analyst.role}")
306
+ print()
307
+
308
+
309
+ # Test with a sample paper (you'll need to provide path)
310
+ print("📄 To test, provide path to a PDF research paper:")
311
+ paper_path = input("Enter path (or press Enter to skip): ").strip()
312
+
313
+ if paper_path:
314
+ # Create a test message
315
+ test_message = Message(
316
+ sender="tester",
317
+ recipient="analyst",
318
+ message_type=MessageType.REQUEST,
319
+ content={
320
+ 'action': 'analyze',
321
+ 'paper_path': paper_path
322
+ }
323
+ )
324
+
325
+ # Process
326
+ result = analyst.process(test_message)
327
+
328
+ print("\n" + "="*60)
329
+ print("📊 ANALYSIS RESULT")
330
+ print("="*60)
331
+ print(json.dumps(result, indent=2))
332
+ else:
333
+ print("⏭️ Skipping test (no paper provided)")
334
+
335
+ print("\n✅ Demo complete!")
336
+
337
+
338
+ if __name__ == "__main__":
339
+ demo_analyst()
agents/evaluator_agent.py ADDED
@@ -0,0 +1,423 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ agents/evaluator_agent.py
3
+ Assess paper quality and funding potential
4
+ """
5
+
6
+ import sys
7
+ import os
8
+
9
+ # Add parent directory to path
10
+ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
11
+
12
+ from demo_phase1 import BaseAgent, Message, MessageType
13
+ from typing import Dict, Any
14
+ import json
15
+
16
+
17
+ class EvaluatorAgent(BaseAgent):
18
+ """
19
+ Evaluator Agent - Paper Quality Assessment & Review
20
+
21
+ Role: Assess paper quality and impact potential
22
+ Personality: Critical but fair, peer-review style
23
+
24
+ Capabilities:
25
+ - Score originality, methodology, and impact
26
+ - Identify weaknesses in approach
27
+ - Assess funding potential
28
+ - Generate reviewer-style feedback
29
+ - Compare to state-of-the-art
30
+ """
31
+
32
+ def __init__(self, message_queue, llm):
33
+ super().__init__(
34
+ name="evaluator",
35
+ role="Paper Quality Assessment & Review",
36
+ message_queue=message_queue
37
+ )
38
+ self.llm = llm
39
+
40
+ def process(self, message: Message) -> Dict[str, Any]:
41
+ """
42
+ Process evaluation request
43
+
44
+ Expected message content:
45
+ {
46
+ 'action': 'evaluate',
47
+ 'analysis': {... analyst output ...}
48
+ }
49
+
50
+ Returns:
51
+ {
52
+ 'scores': {
53
+ 'originality': 0-10,
54
+ 'methodology': 0-10,
55
+ 'impact': 0-10,
56
+ 'clarity': 0-10,
57
+ 'overall': 0-10
58
+ },
59
+ 'funding_potential': 'HIGH' | 'MEDIUM' | 'LOW',
60
+ 'strengths': [...],
61
+ 'weaknesses': [...],
62
+ 'reviewer_feedback': [...],
63
+ 'recommendations': {...}
64
+ }
65
+ """
66
+ action = message.content.get('action')
67
+
68
+ if action != 'evaluate':
69
+ return {'error': f'Unknown action: {action}'}
70
+
71
+ analysis = message.content.get('analysis')
72
+
73
+ if not analysis:
74
+ return {'error': 'No analysis provided'}
75
+
76
+ print(f"⚖️ Evaluator: Assessing paper quality...")
77
+
78
+ try:
79
+ # Evaluate the paper
80
+ evaluation = self._evaluate_paper(analysis)
81
+
82
+ print(f"✅ Evaluator: Evaluation complete")
83
+ print(f" Overall Score: {evaluation.get('scores', {}).get('overall', 0)}/10")
84
+ print(f" Funding Potential: {evaluation.get('funding_potential', 'UNKNOWN')}")
85
+
86
+ return evaluation
87
+
88
+ except Exception as e:
89
+ print(f"❌ Evaluator error: {e}")
90
+ return {'error': str(e)}
91
+
92
+ def _evaluate_paper(self, analysis: Dict[str, Any]) -> Dict[str, Any]:
93
+ """Use LLM to evaluate paper quality"""
94
+
95
+ print("🧠 Evaluator: Calling LLM for evaluation...")
96
+
97
+ # Build evaluation prompt
98
+ prompt = f"""You are a peer reviewer evaluating this research paper. Provide a thorough assessment.
99
+
100
+ PAPER ANALYSIS:
101
+ {json.dumps(analysis, indent=2)}
102
+
103
+ Evaluate the paper on these dimensions:
104
+
105
+ 1. **Originality** (0-10): How novel is this work?
106
+ - Are the ideas new?
107
+ - Does it advance the field?
108
+ - Is it incremental or groundbreaking?
109
+
110
+ 2. **Methodology** (0-10): How sound is the approach?
111
+ - Is the method well-designed?
112
+ - Are experiments rigorous?
113
+ - Are datasets appropriate?
114
+ - Are comparisons fair?
115
+
116
+ 3. **Impact** (0-10): What is the potential impact?
117
+ - Will this influence future research?
118
+ - Are there practical applications?
119
+ - Is it significant for the community?
120
+
121
+ 4. **Clarity** (0-10): How well is it presented?
122
+ - Is the writing clear?
123
+ - Are results well-explained?
124
+ - Is it reproducible?
125
+
126
+ 5. **Overall** (0-10): Overall quality assessment
127
+
128
+ Additionally provide:
129
+ - **Funding Potential**: HIGH / MEDIUM / LOW (would this get funded?)
130
+ - **Strengths**: 3-5 key strengths
131
+ - **Weaknesses**: 3-5 key weaknesses or concerns
132
+ - **Reviewer Feedback**: 3-5 critical comments (peer-review style)
133
+ - **Recommendations**: What needs improvement for acceptance/funding?
134
+
135
+ Be critical but constructive. Think like a senior researcher reviewing for a top conference."""
136
+
137
+ # Define expected schema
138
+ schema = {
139
+ "scores": {
140
+ "originality": "number (0-10)",
141
+ "methodology": "number (0-10)",
142
+ "impact": "number (0-10)",
143
+ "clarity": "number (0-10)",
144
+ "overall": "number (0-10)"
145
+ },
146
+ "funding_potential": "HIGH | MEDIUM | LOW",
147
+ "strengths": ["string"],
148
+ "weaknesses": ["string"],
149
+ "reviewer_feedback": ["string"],
150
+ "recommendations": {
151
+ "for_publication": ["string"],
152
+ "for_funding": ["string"],
153
+ "future_work": ["string"]
154
+ },
155
+ "decision_reasoning": "string"
156
+ }
157
+
158
+ # Call LLM
159
+ try:
160
+ evaluation = self.llm.generate_structured(
161
+ prompt=prompt,
162
+ schema=schema,
163
+ max_tokens=2000,
164
+ temperature=0.4 # Balanced for thoughtful evaluation
165
+ )
166
+
167
+ print(f"✅ Evaluator: LLM evaluation successful")
168
+
169
+ # Validate scores are in range
170
+ for score_name, score in evaluation['scores'].items():
171
+ if not (0 <= score <= 10):
172
+ print(f"⚠️ Warning: {score_name} score out of range: {score}")
173
+ evaluation['scores'][score_name] = max(0, min(10, score))
174
+
175
+ return evaluation
176
+
177
+ except Exception as e:
178
+ print(f"❌ Evaluator LLM error: {e}")
179
+
180
+ # Return fallback evaluation
181
+ return {
182
+ 'scores': {
183
+ 'originality': 0,
184
+ 'methodology': 0,
185
+ 'impact': 0,
186
+ 'clarity': 0,
187
+ 'overall': 0
188
+ },
189
+ 'funding_potential': 'UNKNOWN',
190
+ 'strengths': [],
191
+ 'weaknesses': [f'Evaluation failed: {str(e)}'],
192
+ 'reviewer_feedback': [f'Could not complete evaluation: {str(e)}'],
193
+ 'recommendations': {
194
+ 'for_publication': [],
195
+ 'for_funding': [],
196
+ 'future_work': []
197
+ },
198
+ 'decision_reasoning': f'Evaluation error: {str(e)}',
199
+ 'error': str(e)
200
+ }
201
+
202
+ def compare_to_baseline(
203
+ self,
204
+ analysis: Dict[str, Any],
205
+ baseline_description: str
206
+ ) -> Dict[str, Any]:
207
+ """Compare paper to a baseline or state-of-the-art"""
208
+
209
+ print("📊 Evaluator: Comparing to baseline...")
210
+
211
+ prompt = f"""Compare this paper to the baseline/state-of-the-art:
212
+
213
+ PAPER RESULTS:
214
+ {json.dumps(analysis.get('main_results', {}), indent=2)}
215
+
216
+ BASELINE:
217
+ {baseline_description}
218
+
219
+ Provide comparison:
220
+ 1. How does this paper improve over baseline?
221
+ 2. What are the performance gains?
222
+ 3. Is the comparison fair?
223
+ 4. What are the limitations of the comparison?
224
+
225
+ Be specific about quantitative improvements if mentioned."""
226
+
227
+ try:
228
+ comparison = self.llm.generate(
229
+ prompt=prompt,
230
+ max_tokens=500,
231
+ temperature=0.5
232
+ )
233
+
234
+ return {
235
+ 'comparison_summary': comparison,
236
+ 'baseline': baseline_description
237
+ }
238
+
239
+ except Exception as e:
240
+ return {
241
+ 'comparison_summary': f'Comparison failed: {str(e)}',
242
+ 'error': str(e)
243
+ }
244
+
245
+ def assess_reproducibility(self, analysis: Dict[str, Any]) -> Dict[str, Any]:
246
+ """Assess how reproducible the work is"""
247
+
248
+ print("🔬 Evaluator: Assessing reproducibility...")
249
+
250
+ methodology = analysis.get('methodology', {})
251
+
252
+ prompt = f"""Assess the reproducibility of this research:
253
+
254
+ METHODOLOGY:
255
+ {json.dumps(methodology, indent=2)}
256
+
257
+ DATASETS: {methodology.get('datasets', [])}
258
+ EVALUATION METRICS: {methodology.get('evaluation_metrics', [])}
259
+
260
+ Rate reproducibility (0-10) and identify:
261
+ 1. What information is provided?
262
+ 2. What is missing for reproduction?
263
+ 3. Are code/data available? (if mentioned)
264
+ 4. Can someone else replicate this?
265
+
266
+ Provide:
267
+ - reproducibility_score (0-10)
268
+ - available_resources (list)
269
+ - missing_information (list)
270
+ - reproducibility_notes (string)"""
271
+
272
+ schema = {
273
+ "reproducibility_score": "number (0-10)",
274
+ "available_resources": ["string"],
275
+ "missing_information": ["string"],
276
+ "reproducibility_notes": "string"
277
+ }
278
+
279
+ try:
280
+ assessment = self.llm.generate_structured(
281
+ prompt=prompt,
282
+ schema=schema,
283
+ temperature=0.3
284
+ )
285
+
286
+ return assessment
287
+
288
+ except Exception as e:
289
+ return {
290
+ 'reproducibility_score': 0,
291
+ 'available_resources': [],
292
+ 'missing_information': [f'Assessment error: {str(e)}'],
293
+ 'reproducibility_notes': 'Could not assess',
294
+ 'error': str(e)
295
+ }
296
+
297
+ def generate_review_summary(self, evaluation: Dict[str, Any]) -> str:
298
+ """Generate a concise review summary"""
299
+
300
+ scores = evaluation.get('scores', {})
301
+ funding = evaluation.get('funding_potential', 'UNKNOWN')
302
+
303
+ summary = f"""REVIEW SUMMARY
304
+ {"="*50}
305
+
306
+ Overall Score: {scores.get('overall', 0)}/10
307
+ Funding Potential: {funding}
308
+
309
+ Scores:
310
+ - Originality: {scores.get('originality', 0)}/10
311
+ - Methodology: {scores.get('methodology', 0)}/10
312
+ - Impact: {scores.get('impact', 0)}/10
313
+ - Clarity: {scores.get('clarity', 0)}/10
314
+
315
+ STRENGTHS:
316
+ """
317
+
318
+ for i, strength in enumerate(evaluation.get('strengths', []), 1):
319
+ summary += f"{i}. {strength}\n"
320
+
321
+ summary += "\nWEAKNESSES:\n"
322
+ for i, weakness in enumerate(evaluation.get('weaknesses', []), 1):
323
+ summary += f"{i}. {weakness}\n"
324
+
325
+ summary += f"\nDECISION: {evaluation.get('decision_reasoning', 'N/A')}"
326
+
327
+ return summary
328
+
329
+
330
+ # ==================== DEMO ====================
331
+
332
+ def demo_evaluator():
333
+ """Demo the Evaluator Agent"""
334
+
335
+ print("="*60)
336
+ print("⚖️ EVALUATOR AGENT DEMO")
337
+ print("="*60)
338
+ print()
339
+
340
+ # Initialize dependencies
341
+ from llm_wrapper import LLMWrapper
342
+ from demo_phase1 import MessageQueue
343
+
344
+ llm = LLMWrapper(model='fast')
345
+ queue = MessageQueue()
346
+
347
+ # Create evaluator agent
348
+ evaluator = EvaluatorAgent(queue, llm)
349
+
350
+ print(f"✅ {evaluator.name} initialized")
351
+ print(f" Role: {evaluator.role}")
352
+ print()
353
+
354
+
355
+ # Mock analysis from analyst
356
+ mock_analysis = {
357
+ 'title': 'Attention Is All You Need',
358
+ 'authors': ['Vaswani et al.'],
359
+ 'key_contributions': [
360
+ 'Introduced Transformer architecture',
361
+ 'Replaced RNNs with self-attention',
362
+ 'Achieved state-of-the-art on translation'
363
+ ],
364
+ 'methodology': {
365
+ 'approach': 'Transformer neural network with multi-head attention',
366
+ 'datasets': ['WMT 2014 English-German', 'WMT 2014 English-French'],
367
+ 'evaluation_metrics': ['BLEU score', 'Training time']
368
+ },
369
+ 'main_results': {
370
+ 'summary': 'Best BLEU score on translation tasks',
371
+ 'performance_improvements': [
372
+ '28.4 BLEU on WMT 2014 English-German',
373
+ 'Trained in fraction of time vs RNN models'
374
+ ]
375
+ },
376
+ 'limitations': [
377
+ 'Memory intensive for very long sequences',
378
+ 'Less interpretable than RNNs'
379
+ ],
380
+ 'novelty_assessment': {
381
+ 'score': 9,
382
+ 'reasoning': 'Revolutionary architecture that changed NLP'
383
+ }
384
+ }
385
+
386
+ # Create test message
387
+ test_message = Message(
388
+ sender="analyst",
389
+ recipient="evaluator",
390
+ message_type=MessageType.REQUEST,
391
+ content={
392
+ 'action': 'evaluate',
393
+ 'analysis': mock_analysis
394
+ }
395
+ )
396
+
397
+ # Process
398
+ print("🧪 Testing with mock Transformer paper analysis...")
399
+ print()
400
+
401
+ result = evaluator.process(test_message)
402
+
403
+ print("\n" + "="*60)
404
+ print("⚖️ EVALUATION RESULT")
405
+ print("="*60)
406
+
407
+ # Show summary
408
+ if 'error' not in result:
409
+ summary = evaluator.generate_review_summary(result)
410
+ print(summary)
411
+ else:
412
+ print(f"❌ Error: {result['error']}")
413
+
414
+ print("\n" + "="*60)
415
+ print("📋 FULL EVALUATION (JSON)")
416
+ print("="*60)
417
+ print(json.dumps(result, indent=2))
418
+
419
+ print("\n✅ Demo complete!")
420
+
421
+
422
+ if __name__ == "__main__":
423
+ demo_evaluator()
agents/innovator_agent.py ADDED
@@ -0,0 +1,493 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ """
3
+ agents/innovator_agent.py
4
+ Generate novel research directions and extensions
5
+ """
6
+
7
+ import sys
8
+ import os
9
+ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
10
+
11
+ from demo_phase1 import BaseAgent, Message, MessageType
12
+ from typing import Dict, Any
13
+ import json
14
+
15
+ class InnovatorAgent(BaseAgent):
16
+ """
17
+ Innovator Agent - Creative Research Extension
18
+
19
+ Role: Identify future directions & applications
20
+ Personality: Visionary, creative, forward-thinking
21
+
22
+ Capabilities:
23
+ - Generate 3-5 novel research directions
24
+ - Identify potential industry applications
25
+ - Propose extensions to current work
26
+ - Suggest cross-disciplinary connections
27
+ - Create "what if" scenarios
28
+ - Assess commercial potential
29
+ """
30
+
31
+ def __init__(self, message_queue, llm):
32
+ super().__init__(
33
+ name="innovator",
34
+ role="Creative Research Extension & Future Directions",
35
+ message_queue=message_queue
36
+ )
37
+ self.llm = llm
38
+
39
+ def process(self, message: Message) -> Dict[str, Any]:
40
+ """
41
+ Process innovation request
42
+
43
+ Expected message content:
44
+ {
45
+ 'action': 'innovate',
46
+ 'analysis': {...}, # From analyst
47
+ 'evaluation': {...} # From evaluator
48
+ }
49
+
50
+ Returns:
51
+ {
52
+ 'future_directions': [...],
53
+ 'industry_applications': [...],
54
+ 'extensions': [...],
55
+ 'cross_disciplinary': [...],
56
+ 'commercial_potential': 'HIGH' | 'MEDIUM' | 'LOW',
57
+ 'ten_year_vision': '...',
58
+ 'breakthrough_potential': {...}
59
+ }
60
+ """
61
+ action = message.content.get('action')
62
+
63
+ if action != 'innovate':
64
+ return {'error': f'Unknown action: {action}'}
65
+
66
+ analysis = message.content.get('analysis')
67
+ evaluation = message.content.get('evaluation')
68
+
69
+ if not analysis:
70
+ return {'error': 'No analysis provided'}
71
+
72
+ print(f"💡 Innovator: Generating future directions...")
73
+
74
+ try:
75
+ # Generate innovations
76
+ innovations = self._generate_innovations(analysis, evaluation)
77
+
78
+ print(f"✅ Innovator: Generated {len(innovations.get('future_directions', []))} future directions")
79
+ print(f" Commercial Potential: {innovations.get('commercial_potential', 'N/A')}")
80
+
81
+ return innovations
82
+
83
+ except Exception as e:
84
+ print(f"❌ Innovator error: {e}")
85
+ return {'error': str(e)}
86
+
87
+ def _generate_innovations(
88
+ self,
89
+ analysis: Dict[str, Any],
90
+ evaluation: Dict[str, Any] = None
91
+ ) -> Dict[str, Any]:
92
+ """Use LLM to generate innovative directions"""
93
+
94
+ print("🧠 Innovator: Calling LLM for creative ideation...")
95
+
96
+ # Build innovation prompt
97
+ prompt = f"""You are a visionary research innovator. Based on this paper analysis, generate creative future directions.
98
+
99
+ PAPER ANALYSIS:
100
+ Title: {analysis.get('title', 'Unknown')}
101
+ Key Contributions: {json.dumps(analysis.get('key_contributions', []), indent=2)}
102
+ Methodology: {json.dumps(analysis.get('methodology', {}), indent=2)}
103
+ Results: {json.dumps(analysis.get('main_results', {}), indent=2)}
104
+ Limitations: {json.dumps(analysis.get('limitations', []), indent=2)}
105
+ Gaps: {json.dumps(analysis.get('gaps_identified', []), indent=2)}
106
+
107
+ Generate innovative extensions and directions:
108
+
109
+
110
+ 1. **Future Research Directions** (3-5 specific directions):
111
+ - What are the most promising unexplored areas?
112
+ - What novel variations could be investigated?
113
+ - What fundamental questions remain?
114
+
115
+ 2. **Industry Applications** (3-5 real-world applications):
116
+ - Healthcare, finance, education, manufacturing, etc.
117
+ - Specific use cases with clear value
118
+ - Near-term vs long-term opportunities
119
+
120
+ 3. **Novel Extensions** (3-5 technical extensions):
121
+ - Algorithmic improvements
122
+ - New architectures or approaches
123
+ - Combining with other techniques
124
+ - Scaling to new domains
125
+
126
+ 4. **Cross-Disciplinary Connections** (2-4 connections):
127
+ - How could this intersect with biology, physics, social science, etc.?
128
+ - Unexpected applications in other fields
129
+ - Potential for interdisciplinary breakthroughs
130
+
131
+ 5. **Commercial Potential**: HIGH / MEDIUM / LOW
132
+ - Can this be monetized?
133
+ - Market size and demand
134
+ - Competitive advantages
135
+
136
+ 6. **10-Year Vision**:
137
+ - Where could this research lead in a decade?
138
+ - Transformative potential
139
+ - Societal impact
140
+
141
+ 7. **Breakthrough Potential**:
142
+ - Could this lead to major breakthroughs?
143
+ - Nobel Prize potential? (be honest)
144
+ - Paradigm-shifting capability
145
+
146
+ Be creative, ambitious, and forward-thinking. Think like a visionary researcher who sees beyond current limitations."""
147
+
148
+ # Define expected schema
149
+ schema = {
150
+ "future_directions": [
151
+ {
152
+ "direction": "string (title)",
153
+ "description": "string (2-3 sentences)",
154
+ "feasibility": "HIGH | MEDIUM | LOW",
155
+ "timeframe": "string (1-2 years, 3-5 years, 5-10 years)"
156
+ }
157
+ ],
158
+ "industry_applications": [
159
+ {
160
+ "domain": "string (industry/field)",
161
+ "application": "string (specific use case)",
162
+ "value_proposition": "string",
163
+ "readiness": "string (ready now, 1-2 years, 3-5 years)"
164
+ }
165
+ ],
166
+ "extensions": [
167
+ {
168
+ "extension": "string (title)",
169
+ "description": "string",
170
+ "technical_challenge": "string"
171
+ }
172
+ ],
173
+ "cross_disciplinary": [
174
+ {
175
+ "field": "string",
176
+ "connection": "string",
177
+ "potential": "string"
178
+ }
179
+ ],
180
+ "commercial_potential": "HIGH | MEDIUM | LOW",
181
+ "commercial_reasoning": "string",
182
+ "ten_year_vision": "string (paragraph)",
183
+ "breakthrough_potential": {
184
+ "score": "number (0-10)",
185
+ "reasoning": "string",
186
+ "paradigm_shift": "boolean"
187
+ }
188
+ }
189
+
190
+ # Call LLM
191
+ try:
192
+ innovations = self.llm.generate_structured(
193
+ prompt=prompt,
194
+ schema=schema,
195
+ max_tokens=3000,
196
+ temperature=0.8 # Higher for creativity
197
+ )
198
+
199
+ print(f"✅ Innovator: LLM ideation successful")
200
+
201
+ return innovations
202
+
203
+ except Exception as e:
204
+ print(f"❌ Innovator LLM error: {e}")
205
+
206
+ # Return fallback
207
+ return {
208
+ 'future_directions': [
209
+ {
210
+ 'direction': 'Could not generate',
211
+ 'description': f'Ideation failed: {str(e)}',
212
+ 'feasibility': 'UNKNOWN',
213
+ 'timeframe': 'Unknown'
214
+ }
215
+ ],
216
+ 'industry_applications': [],
217
+ 'extensions': [],
218
+ 'cross_disciplinary': [],
219
+ 'commercial_potential': 'UNKNOWN',
220
+ 'commercial_reasoning': f'Error: {str(e)}',
221
+ 'ten_year_vision': 'Could not generate vision',
222
+ 'breakthrough_potential': {
223
+ 'score': 0,
224
+ 'reasoning': f'Generation failed: {str(e)}',
225
+ 'paradigm_shift': False
226
+ },
227
+ 'error': str(e)
228
+ }
229
+ def generate_what_if_scenarios(self, analysis: Dict[str, Any]) -> list:
230
+ """Generate creative 'what if' scenarios"""
231
+
232
+ print("🔮 Innovator: Generating 'what if' scenarios...")
233
+
234
+ prompt = f"""Based on this research, generate 5 creative "what if" scenarios:
235
+
236
+ Research: {analysis.get('title', 'Unknown')}
237
+ Contributions: {json.dumps(analysis.get('key_contributions', []))}
238
+
239
+ Generate 5 "what if" scenarios exploring:
240
+ 1. What if this technique was 100x faster?
241
+ 2. What if it could handle 1000x more data?
242
+ 3. What if it was combined with [emerging technology]?
243
+ 4. What if the assumptions were changed?
244
+ 5. What if it was applied to [unexpected domain]?
245
+
246
+ Make them specific, creative, and thought-provoking."""
247
+
248
+
249
+ try:
250
+ response = self.llm.generate(
251
+ prompt=prompt,
252
+ max_tokens=800,
253
+ temperature=0.9 # Very creative
254
+ )
255
+
256
+ # Parse scenarios
257
+ scenarios = [s.strip() for s in response.split('\n') if s.strip()]
258
+ return scenarios[:5]
259
+
260
+ except Exception as e:
261
+ print(f"❌ Scenario generation error: {e}")
262
+ return ["Could not generate scenarios due to error"]
263
+
264
+ def assess_funding_opportunities(
265
+ self,
266
+ innovations: Dict[str, Any]
267
+ ) -> Dict[str, Any]:
268
+ """Identify potential funding opportunities"""
269
+
270
+ print("💰 Innovator: Identifying funding opportunities...")
271
+
272
+ prompt = f"""Based on these research innovations, identify funding opportunities:
273
+
274
+ INNOVATIONS:
275
+ {json.dumps(innovations, indent=2)}
276
+
277
+ Identify:
278
+ 1. **Relevant Funding Agencies**:
279
+ - NSF programs (specific)
280
+ - NIH if applicable
281
+ - DARPA if defense-related
282
+ - Private foundations
283
+ - Industry partnerships
284
+
285
+ 2. **Grant Types**:
286
+ - Small grants ($50K-$250K)
287
+ - Medium grants ($250K-$1M)
288
+ - Large grants ($1M+)
289
+
290
+ 3. **Best Fit Programs** (top 3):
291
+ - Program name
292
+ - Why it's a good fit
293
+ - Typical funding amount
294
+
295
+ 4. **Funding Timeline**:
296
+ - When to apply
297
+ - Competition level"""
298
+
299
+ schema = {
300
+ "funding_agencies": ["string"],
301
+ "grant_types": {
302
+ "small_grants": ["string"],
303
+ "medium_grants": ["string"],
304
+ "large_grants": ["string"]
305
+ },
306
+ "best_fit_programs": [
307
+ {
308
+ "program": "string",
309
+ "agency": "string",
310
+ "fit_reasoning": "string",
311
+ "typical_amount": "string"
312
+ }
313
+ ],
314
+ "recommended_timeline": "string"
315
+ }
316
+
317
+ try:
318
+ opportunities = self.llm.generate_structured(
319
+ prompt=prompt,
320
+ schema=schema,
321
+ max_tokens=1500,
322
+ temperature=0.5
323
+ )
324
+
325
+ return opportunities
326
+
327
+ except Exception as e:
328
+ return {
329
+ 'funding_agencies': [],
330
+ 'grant_types': {},
331
+ 'best_fit_programs': [],
332
+ 'recommended_timeline': 'Unknown',
333
+ 'error': str(e)
334
+ }
335
+
336
+ def generate_collaboration_network(
337
+ self,
338
+ analysis: Dict[str, Any]
339
+ ) -> Dict[str, Any]:
340
+ """Suggest potential collaborators and interdisciplinary connections"""
341
+
342
+ print("🤝 Innovator: Mapping collaboration opportunities...")
343
+
344
+ prompt = f"""Based on this research, suggest collaboration opportunities:
345
+
346
+ Research: {analysis.get('title', 'Unknown')}
347
+ Field: Based on {json.dumps(analysis.get('methodology', {}))}
348
+
349
+ Suggest:
350
+ 1. **Complementary Expertise Needed** (3-5):
351
+ - What skills/knowledge would enhance this?
352
+ - Specific expertise areas
353
+
354
+ 2. **Potential Collaborator Types**:
355
+ - Academic departments
356
+ - Research labs
357
+ - Industry partners
358
+ - Government agencies
359
+
360
+ 3. **Interdisciplinary Opportunities**:
361
+ - Fields to connect with
362
+ - Synergies and benefits
363
+
364
+ 4. **International Collaboration**:
365
+ - Countries/regions with relevant expertise
366
+ - Global research networks"""
367
+
368
+ try:
369
+ response = self.llm.generate(
370
+ prompt=prompt,
371
+ max_tokens=1000,
372
+ temperature=0.6
373
+ )
374
+
375
+ return {
376
+ 'collaboration_suggestions': response,
377
+ 'generated': True
378
+ }
379
+
380
+ except Exception as e:
381
+ return {
382
+ 'collaboration_suggestions': 'Could not generate',
383
+ 'error': str(e)
384
+ }
385
+
386
+ # ==================== DEMO ====================
387
+
388
+ def demo_innovator():
389
+ """Demo the Innovator Agent"""
390
+
391
+ print("="*60)
392
+ print("💡 INNOVATOR AGENT DEMO")
393
+ print("="*60)
394
+ print()
395
+
396
+ # Initialize dependencies
397
+ from tools.llm_wrapper import LLMWrapper
398
+ from demo_phase1 import MessageQueue
399
+
400
+ llm = LLMWrapper(model='fast')
401
+ queue = MessageQueue()
402
+
403
+ # Create innovator agent
404
+ innovator = InnovatorAgent(queue, llm)
405
+
406
+ print(f"✅ {innovator.name} initialized")
407
+ print(f" Role: {innovator.role}")
408
+ print()
409
+
410
+ # Mock analysis from previous agents
411
+ mock_analysis = {
412
+ 'title': 'Attention Is All You Need',
413
+ 'key_contributions': [
414
+ 'Introduced Transformer architecture',
415
+ 'Eliminated recurrence with self-attention',
416
+ 'Achieved SOTA on translation'
417
+ ],
418
+ 'methodology': {
419
+ 'approach': 'Multi-head self-attention',
420
+ 'datasets': ['WMT 2014'],
421
+ 'evaluation_metrics': ['BLEU']
422
+ },
423
+ 'main_results': {
424
+ 'summary': 'Best translation performance',
425
+ 'performance_improvements': ['28.4 BLEU on EN-DE']
426
+ },
427
+ 'limitations': [
428
+ 'O(n²) memory complexity',
429
+ 'Requires large datasets'
430
+ ],
431
+ 'gaps_identified': [
432
+ 'Efficiency for long sequences',
433
+ 'Applications beyond NLP'
434
+ ]
435
+ }
436
+
437
+ # Create test message
438
+ test_message = Message(
439
+ sender="supervisor",
440
+ recipient="innovator",
441
+ message_type=MessageType.REQUEST,
442
+ content={
443
+ 'action': 'innovate',
444
+ 'analysis': mock_analysis
445
+ }
446
+ )
447
+
448
+ # Process
449
+ print("🧪 Generating innovations for Transformer paper...")
450
+ print()
451
+
452
+ result = innovator.process(test_message)
453
+
454
+ print("\n" + "="*60)
455
+ print("💡 INNOVATION RESULTS")
456
+ print("="*60)
457
+
458
+ if 'error' not in result:
459
+ print(f"\n🚀 Future Directions ({len(result.get('future_directions', []))}):")
460
+ for i, direction in enumerate(result.get('future_directions', [])[:3], 1):
461
+ print(f"\n{i}. {direction.get('direction', 'N/A')}")
462
+ print(f" {direction.get('description', 'N/A')}")
463
+ print(f" Feasibility: {direction.get('feasibility', 'N/A')}")
464
+ print(f" Timeframe: {direction.get('timeframe', 'N/A')}")
465
+
466
+ print(f"\n🏭 Industry Applications ({len(result.get('industry_applications', []))}):")
467
+ for i, app in enumerate(result.get('industry_applications', [])[:3], 1):
468
+ print(f"\n{i}. {app.get('domain', 'N/A')}: {app.get('application', 'N/A')}")
469
+ print(f" Value: {app.get('value_proposition', 'N/A')}")
470
+
471
+ print(f"\n💰 Commercial Potential: {result.get('commercial_potential', 'N/A')}")
472
+ print(f" {result.get('commercial_reasoning', 'N/A')}")
473
+
474
+ print(f"\n🔮 10-Year Vision:")
475
+ print(f" {result.get('ten_year_vision', 'N/A')}")
476
+
477
+ breakthrough = result.get('breakthrough_potential', {})
478
+ print(f"\n⭐ Breakthrough Potential: {breakthrough.get('score', 0)}/10")
479
+ print(f" {breakthrough.get('reasoning', 'N/A')}")
480
+ print(f" Paradigm Shift: {breakthrough.get('paradigm_shift', False)}")
481
+ else:
482
+ print(f"\n❌ Error: {result['error']}")
483
+
484
+ print("\n" + "="*60)
485
+ print("📋 FULL OUTPUT (JSON)")
486
+ print("="*60)
487
+ print(json.dumps(result, indent=2))
488
+
489
+ print("\n✅ Demo complete!")
490
+
491
+
492
+ if __name__ == "__main__":
493
+ demo_innovator()
agents/writer_agent.py ADDED
@@ -0,0 +1,624 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ agents/writer_agent.py
3
+ Synthesize all agent outputs into grant proposal
4
+ """
5
+
6
+ import sys
7
+ import os
8
+ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
9
+
10
+
11
+ from demo_phase1 import BaseAgent, Message, MessageType
12
+ from typing import Dict, Any
13
+ import json
14
+ from datetime import datetime
15
+
16
+ class WriterAgent(BaseAgent):
17
+ """
18
+ Writer Agent - Grant Proposal Synthesis
19
+
20
+ Role: Create final grant proposal document
21
+ Personality: Eloquent, persuasive, policy-aware
22
+
23
+ Capabilities:
24
+ - Synthesize all agent outputs into coherent narrative
25
+ - Write NSF/NIH-style grant proposal sections
26
+ - Handle conflicts between agents
27
+ - Generate executive summary
28
+ - Create research plan and timeline
29
+ - Write impact statement
30
+ - Format professional proposal
31
+ """
32
+ def __init__(self, message_queue, llm):
33
+ super().__init__(
34
+ name="writer",
35
+ role="Grant Proposal Synthesis & Document Generation",
36
+ message_queue=message_queue
37
+ )
38
+ self.llm = llm
39
+
40
+ def process(self, message: Message) -> Dict[str, Any]:
41
+ """
42
+ Process writing request
43
+
44
+ Expected message content:
45
+ {
46
+ 'action': 'write_proposal',
47
+ 'analysis': {...}, # From analyst
48
+ 'evaluation': {...}, # From evaluator
49
+ 'innovations': {...}, # From innovator
50
+ 'conflicts': [...] # Optional: any conflicts
51
+ }
52
+
53
+ Returns:
54
+ {
55
+ 'proposal': {
56
+ 'executive_summary': '...',
57
+ 'project_description': '...',
58
+ 'research_plan': '...',
59
+ 'broader_impacts': '...',
60
+ 'budget_justification': '...',
61
+ 'timeline': {...}
62
+ },
63
+ 'full_text': '...', # Complete formatted proposal
64
+ 'word_count': int,
65
+ 'metadata': {...}
66
+ }
67
+ """
68
+ action = message.content.get('action')
69
+
70
+
71
+ if action != 'write_proposal':
72
+ return {'error': f'Unknown action: {action}'}
73
+
74
+ analysis = message.content.get('analysis')
75
+ evaluation = message.content.get('evaluation')
76
+ innovations = message.content.get('innovations')
77
+ conflicts = message.content.get('conflicts', [])
78
+
79
+ if not analysis or not evaluation or not innovations:
80
+ return {'error': 'Missing required inputs (analysis, evaluation, or innovations)'}
81
+
82
+ print(f"✍️ Writer: Synthesizing grant proposal...")
83
+
84
+ try:
85
+ # Generate proposal
86
+ proposal = self._write_proposal(
87
+ analysis=analysis,
88
+ evaluation=evaluation,
89
+ innovations=innovations,
90
+ conflicts=conflicts
91
+ )
92
+
93
+ word_count = len(proposal.get('full_text', '').split())
94
+
95
+ print(f"✅ Writer: Proposal complete ({word_count} words)")
96
+
97
+ return proposal
98
+
99
+ except Exception as e:
100
+ print(f"❌ Writer error: {e}")
101
+ return {'error': str(e)}
102
+
103
+ def _write_proposal(
104
+ self,
105
+ analysis: Dict[str, Any],
106
+ evaluation: Dict[str, Any],
107
+ innovations: Dict[str, Any],
108
+ conflicts: list
109
+ ) -> Dict[str, Any]:
110
+ """Generate complete grant proposal"""
111
+
112
+ print("🧠 Writer: Generating proposal sections...")
113
+
114
+ # Generate each section
115
+ sections = {}
116
+
117
+ # 1. Executive Summary
118
+ print(" 📝 Writing executive summary...")
119
+ sections['executive_summary'] = self._write_executive_summary(
120
+ analysis, evaluation, innovations
121
+ )
122
+
123
+ # 2. Project Description
124
+ print(" 📝 Writing project description...")
125
+ sections['project_description'] = self._write_project_description(
126
+ analysis, evaluation
127
+ )
128
+
129
+ # 3. Research Plan
130
+ print(" 📝 Writing research plan...")
131
+ sections['research_plan'] = self._write_research_plan(
132
+ analysis, innovations
133
+ )
134
+
135
+ # 4. Broader Impacts
136
+ print(" 📝 Writing broader impacts...")
137
+ sections['broader_impacts'] = self._write_broader_impacts(
138
+ innovations
139
+ )
140
+
141
+ # 5. Budget Justification
142
+ print(" 📝 Writing budget justification...")
143
+ sections['budget_justification'] = self._write_budget_justification(
144
+ innovations
145
+ )
146
+
147
+ # 6. Timeline
148
+ print(" 📝 Creating timeline...")
149
+ sections['timeline'] = self._create_timeline(innovations)
150
+
151
+ # 7. References (placeholder)
152
+ sections['references'] = self._create_references(analysis)
153
+
154
+ # Assemble full proposal
155
+ full_text = self._assemble_proposal(sections, analysis)
156
+
157
+ # Handle conflicts if any
158
+ if conflicts:
159
+ conflict_resolution = self._resolve_conflicts(conflicts)
160
+ sections['conflict_resolution'] = conflict_resolution
161
+
162
+ return {
163
+ 'proposal': sections,
164
+ 'full_text': full_text,
165
+ 'word_count': len(full_text.split()),
166
+ 'metadata': {
167
+ 'generated_at': datetime.now().isoformat(),
168
+ 'paper_title': analysis.get('title', 'Unknown'),
169
+ 'quality_score': evaluation.get('scores', {}).get('overall', 0),
170
+ 'funding_potential': evaluation.get('funding_potential', 'UNKNOWN'),
171
+ 'conflicts_resolved': len(conflicts)
172
+ }
173
+ }
174
+
175
+ def _write_executive_summary(
176
+ self,
177
+ analysis: Dict,
178
+ evaluation: Dict,
179
+ innovations: Dict
180
+ ) -> str:
181
+ """Generate executive summary (1 page)"""
182
+
183
+ prompt = f"""Write a compelling 1-page executive summary for a grant proposal based on:
184
+
185
+ PAPER: {analysis.get('title', 'Unknown')}
186
+
187
+ KEY FINDINGS:
188
+ {json.dumps(analysis.get('key_contributions', []), indent=2)}
189
+
190
+ QUALITY ASSESSMENT:
191
+ Overall Score: {evaluation.get('scores', {}).get('overall', 0)}/10
192
+ Funding Potential: {evaluation.get('funding_potential', 'UNKNOWN')}
193
+
194
+ FUTURE DIRECTIONS:
195
+ {json.dumps([d.get('direction') for d in innovations.get('future_directions', [])], indent=2)}
196
+
197
+ Write an executive summary (250-300 words) that:
198
+ 1. Opens with a compelling hook about the problem
199
+ 2. Summarizes the key innovation
200
+ 3. Highlights intellectual merit
201
+ 4. Emphasizes broader impacts
202
+ 5. States funding request (assume $500K over 3 years)
203
+ 6. Ends with transformative potential
204
+
205
+ Use persuasive, professional grant-writing style. Make it exciting but credible."""
206
+
207
+ try:
208
+ summary = self.llm.generate(
209
+ prompt=prompt,
210
+ max_tokens=500,
211
+ temperature=0.7
212
+ )
213
+ return summary.strip()
214
+ except Exception as e:
215
+ return f"[Executive Summary - Generation Error: {e}]"
216
+
217
+ def _write_project_description(
218
+ self,
219
+ analysis: Dict,
220
+ evaluation: Dict
221
+ ) -> str:
222
+ """Generate project description (2-3 pages)"""
223
+ prompt = f"""Write a detailed project description for a grant proposal:
224
+
225
+ PAPER ANALYSIS:
226
+ Title: {analysis.get('title')}
227
+ Contributions: {json.dumps(analysis.get('key_contributions', []))}
228
+ Methodology: {json.dumps(analysis.get('methodology', {}))}
229
+ Results: {json.dumps(analysis.get('main_results', {}))}
230
+
231
+ EVALUATION:
232
+ Strengths: {json.dumps(evaluation.get('strengths', []))}
233
+ Weaknesses: {json.dumps(evaluation.get('weaknesses', []))}
234
+
235
+ Write 3-4 paragraphs covering:
236
+ 1. **Background & Motivation**: Why is this important?
237
+ 2. **Current State**: What has been done (cite the paper)?
238
+ 3. **Gap & Opportunity**: What's missing and why it matters
239
+ 4. **Proposed Work**: What we will do to address the gap
240
+
241
+ Use clear, compelling academic writing. Be specific about technical details."""
242
+
243
+ try:
244
+ description = self.llm.generate(
245
+ prompt=prompt,
246
+ max_tokens=800,
247
+ temperature=0.6
248
+ )
249
+ return description.strip()
250
+ except Exception as e:
251
+ return f"[Project Description - Generation Error: {e}]"
252
+
253
+ def _write_research_plan(
254
+ self,
255
+ analysis: Dict,
256
+ innovations: Dict
257
+ ) -> str:
258
+ """Generate research plan with specific aims"""
259
+
260
+ prompt = f"""Write a detailed research plan with specific aims:
261
+ CURRENT WORK:
262
+ {json.dumps(analysis.get('key_contributions', []))}
263
+
264
+ FUTURE DIRECTIONS:
265
+ {json.dumps([{
266
+ 'direction': d.get('direction'),
267
+ 'description': d.get('description'),
268
+ 'feasibility': d.get('feasibility')
269
+ } for d in innovations.get('future_directions', [])[:3]], indent=2)}
270
+
271
+ EXTENSIONS:
272
+ {json.dumps([e.get('extension') for e in innovations.get('extensions', [])], indent=2)}
273
+
274
+ Structure:
275
+
276
+ **Aim 1: [First Direction]**
277
+ - Rationale (why important)
278
+ - Approach (how we'll do it)
279
+ - Expected outcomes
280
+ - Potential challenges and mitigation
281
+
282
+ **Aim 2: [Second Direction]**
283
+ - (same structure)
284
+
285
+ **Aim 3: [Third Direction]**
286
+ - (same structure)
287
+
288
+ Write 2-3 paragraphs per aim. Be specific and technical."""
289
+
290
+ try:
291
+ plan = self.llm.generate(
292
+ prompt=prompt,
293
+ max_tokens=1200,
294
+ temperature=0.6
295
+ )
296
+ return plan.strip()
297
+ except Exception as e:
298
+ return f"[Research Plan - Generation Error: {e}]"
299
+
300
+ def _write_broader_impacts(self, innovations: Dict) -> str:
301
+ """Generate broader impacts statement"""
302
+
303
+ prompt = f"""Write a compelling broader impacts statement:
304
+
305
+ APPLICATIONS:
306
+ {json.dumps([{
307
+ 'domain': a.get('domain'),
308
+ 'application': a.get('application'),
309
+ 'value': a.get('value_proposition')
310
+ } for a in innovations.get('industry_applications', [])], indent=2)}
311
+
312
+ COMMERCIAL POTENTIAL: {innovations.get('commercial_potential')}
313
+
314
+ VISION:
315
+ {innovations.get('ten_year_vision', '')}
316
+
317
+ Write 2-3 paragraphs covering:
318
+ 1. **Societal Impact**: How will this benefit society?
319
+ 2. **Educational Impact**: Training, outreach, diversity
320
+ 3. **Economic Impact**: Jobs, innovation, competitiveness
321
+ 4. **Global Impact**: International collaboration, sustainability
322
+
323
+ Be aspirational but realistic. Show transformative potential."""
324
+
325
+ try:
326
+ impacts = self.llm.generate(
327
+ prompt=prompt,
328
+ max_tokens=600,
329
+ temperature=0.7
330
+ )
331
+ return impacts.strip()
332
+ except Exception as e:
333
+ return f"[Broader Impacts - Generation Error: {e}]"
334
+
335
+
336
+ def _write_budget_justification(self, innovations: Dict) -> str:
337
+ """Generate budget justification"""
338
+
339
+ # Simple template budget
340
+ budget_template = """
341
+ BUDGET JUSTIFICATION (3-Year Project, $500,000 Total)
342
+
343
+ **Year 1: $180,000**
344
+ - Personnel: $120,000 (PI 1 month summer, 1 Postdoc, 1 PhD student)
345
+ - Equipment: $30,000 (GPU cluster, software licenses)
346
+ - Travel: $15,000 (Conference presentations, collaborations)
347
+ - Other: $15,000 (Cloud computing, datasets, publication fees)
348
+
349
+ **Year 2: $160,000**
350
+ - Personnel: $125,000 (Same team, cost-of-living adjustment)
351
+ - Equipment: $10,000 (Additional computing resources)
352
+ - Travel: $15,000 (Conferences, workshops)
353
+ - Other: $10,000 (Materials, services)
354
+
355
+ **Year 3: $160,000**
356
+ - Personnel: $130,000 (Same team structure)
357
+ - Travel: $20,000 (Final dissemination, collaborations)
358
+ - Other: $10,000 (Publication, open-source release)
359
+
360
+ **Justification:**
361
+ This budget supports a lean, focused team to achieve the proposed aims. The postdoc
362
+ will lead implementation, the PhD student will conduct experiments, and the PI will
363
+ provide strategic direction. Equipment costs are essential for computational research.
364
+ Travel enables dissemination and collaboration with key partners."""
365
+
366
+ return budget_template.strip()
367
+
368
+ def _create_timeline(self, innovations: Dict) -> Dict[str, list]:
369
+ """Create project timeline"""
370
+
371
+ timeline = {
372
+ 'Year 1': [
373
+ 'Q1: Literature review and baseline implementation',
374
+ 'Q2: Aim 1 - Initial experiments and data collection',
375
+ 'Q3: Aim 1 - Analysis and refinement',
376
+ 'Q4: Aim 2 - Begin second direction'
377
+ ],
378
+ 'Year 2': [
379
+ 'Q1: Aim 2 - Core development',
380
+ 'Q2: Aim 2 - Testing and validation',
381
+ 'Q3: Aim 3 - Begin third direction',
382
+ 'Q4: Integration and cross-validation'
383
+ ],
384
+ 'Year 3': [
385
+ 'Q1: Comprehensive evaluation',
386
+ 'Q2: Real-world deployment and testing',
387
+ 'Q3: Paper writing and submission',
388
+ 'Q4: Open-source release and dissemination'
389
+ ]
390
+ }
391
+
392
+ return timeline
393
+ def _create_references(self, analysis: Dict) -> str:
394
+ """Create references section (placeholder)"""
395
+
396
+ # In real implementation, would extract from PDF
397
+ refs = f"""
398
+ REFERENCES
399
+
400
+ [1] {', '.join(analysis.get('authors', ['Unknown']))}. "{analysis.get('title', 'Unknown')}".
401
+ {analysis.get('venue', 'Conference/Journal')}, {analysis.get('year', 'Year')}.
402
+
403
+ [2-10] Additional references would be extracted from the paper and added here...
404
+ """
405
+ return refs.strip()
406
+
407
+
408
+ def _resolve_conflicts(self, conflicts: list) -> str:
409
+ """Generate conflict resolution explanation"""
410
+
411
+ if not conflicts:
412
+ return "No conflicts to resolve."
413
+
414
+ prompt = f"""These agents disagreed during analysis:
415
+
416
+ {json.dumps(conflicts, indent=2)}
417
+
418
+ Write a brief paragraph explaining:
419
+ 1. What the disagreement was
420
+ 2. How we resolved it (weighted expert opinions, additional analysis, etc.)
421
+ 3. Why the final decision is sound
422
+
423
+ Be diplomatic and show that diverse perspectives strengthen the proposal."""
424
+
425
+ try:
426
+ resolution = self.llm.generate(
427
+ prompt=prompt,
428
+ max_tokens=300,
429
+ temperature=0.6
430
+ )
431
+ return resolution.strip()
432
+ except Exception as e:
433
+ return f"[Conflict resolution failed: {e}]"
434
+
435
+
436
+ def _assemble_proposal(self, sections: Dict, analysis: Dict) -> str:
437
+ """Assemble all sections into formatted proposal"""
438
+
439
+ proposal = f"""
440
+ {'='*70}
441
+ GRANT PROPOSAL
442
+ {'='*70}
443
+
444
+ Title: Extension and Application of "{analysis.get('title', 'Unknown')}"
445
+ Principal Investigator: [PI Name]
446
+ Institution: [Institution]
447
+ Duration: 3 years
448
+ Requested Amount: $500,000
449
+
450
+ {'='*70}
451
+
452
+ EXECUTIVE SUMMARY
453
+ {'-'*70}
454
+ {sections.get('executive_summary', '[Missing]')}
455
+
456
+ {'='*70}
457
+
458
+ PROJECT DESCRIPTION
459
+ {'-'*70}
460
+ {sections.get('project_description', '[Missing]')}
461
+
462
+ {'='*70}
463
+
464
+ RESEARCH PLAN
465
+ {'-'*70}
466
+ {sections.get('research_plan', '[Missing]')}
467
+
468
+ {'='*70}
469
+
470
+ BROADER IMPACTS
471
+ {'-'*70}
472
+ {sections.get('broader_impacts', '[Missing]')}
473
+
474
+ {'='*70}
475
+
476
+ BUDGET JUSTIFICATION
477
+ {'-'*70}
478
+ {sections.get('budget_justification', '[Missing]')}
479
+
480
+ {'='*70}
481
+
482
+ PROJECT TIMELINE
483
+ {'-'*70}
484
+ """
485
+
486
+ # Add timeline
487
+ timeline = sections.get('timeline', {})
488
+ for year, quarters in timeline.items():
489
+ proposal += f"\n{year}:\n"
490
+ for quarter in quarters:
491
+ proposal += f" • {quarter}\n"
492
+
493
+ proposal += f"\n{'='*70}\n"
494
+ proposal += f"\nREFERENCES\n{'-'*70}\n"
495
+ proposal += sections.get('references', '[Missing]')
496
+
497
+ proposal += f"\n\n{'='*70}\n"
498
+ proposal += f"END OF PROPOSAL\n"
499
+ proposal += f"{'='*70}\n"
500
+
501
+ return proposal
502
+
503
+
504
+
505
+ # ==================== DEMO ====================
506
+
507
+ def demo_writer():
508
+ """Demo the Writer Agent"""
509
+
510
+ print("="*60)
511
+ print("✍️ WRITER AGENT DEMO")
512
+ print("="*60)
513
+ print()
514
+
515
+ # Initialize dependencies
516
+ from tools.llm_wrapper import LLMWrapper
517
+ from demo_phase1 import MessageQueue
518
+
519
+ llm = LLMWrapper(model='fast')
520
+ queue = MessageQueue()
521
+
522
+ # Create writer agent
523
+ writer = WriterAgent(queue, llm)
524
+
525
+ print(f"✅ {writer.name} initialized")
526
+ print(f" Role: {writer.role}")
527
+ print()
528
+
529
+ # Mock inputs from other agents
530
+ mock_data = {
531
+ 'analysis': {
532
+ 'title': 'Attention Is All You Need',
533
+ 'authors': ['Vaswani et al.'],
534
+ 'year': 2017,
535
+ 'venue': 'NeurIPS',
536
+ 'key_contributions': [
537
+ 'Introduced Transformer architecture',
538
+ 'Eliminated recurrence',
539
+ 'Achieved SOTA translation'
540
+ ],
541
+ 'methodology': {
542
+ 'approach': 'Self-attention mechanism',
543
+ 'datasets': ['WMT 2014']
544
+ },
545
+ 'main_results': {
546
+ 'summary': 'Best translation performance',
547
+ 'performance_improvements': ['28.4 BLEU']
548
+ }
549
+ },
550
+ 'evaluation': {
551
+ 'scores': {'overall': 9},
552
+ 'funding_potential': 'HIGH',
553
+ 'strengths': ['Novel architecture', 'Strong results'],
554
+ 'weaknesses': ['Memory complexity']
555
+ },
556
+ 'innovations': {
557
+ 'future_directions': [
558
+ {
559
+ 'direction': 'Efficient attention mechanisms',
560
+ 'description': 'Reduce O(n²) complexity',
561
+ 'feasibility': 'HIGH'
562
+ }
563
+ ],
564
+ 'industry_applications': [
565
+ {
566
+ 'domain': 'Healthcare',
567
+ 'application': 'Medical text analysis',
568
+ 'value_proposition': 'Faster diagnosis'
569
+ }
570
+ ],
571
+ 'extensions': [{'extension': 'Sparse attention'}],
572
+ 'commercial_potential': 'HIGH',
573
+ 'ten_year_vision': 'Ubiquitous AI translation'
574
+ }
575
+ }
576
+
577
+ # Create test message
578
+ test_message = Message(
579
+ sender="supervisor",
580
+ recipient="writer",
581
+ message_type=MessageType.REQUEST,
582
+ content={
583
+ 'action': 'write_proposal',
584
+ **mock_data
585
+ }
586
+ )
587
+
588
+ # Process
589
+ print("🧪 Generating grant proposal...")
590
+ print()
591
+
592
+ result = writer.process(test_message)
593
+
594
+ print("\n" + "="*60)
595
+ print("✍️ PROPOSAL GENERATED")
596
+ print("="*60)
597
+
598
+ if 'error' not in result:
599
+ print(f"\n📊 Metadata:")
600
+ metadata = result.get('metadata', {})
601
+ for key, value in metadata.items():
602
+ print(f" {key}: {value}")
603
+
604
+ print(f"\n📄 Proposal Preview (first 1000 chars):")
605
+ print("-" * 60)
606
+ full_text = result.get('full_text', '')
607
+ print(full_text[:1000])
608
+ print("...\n[truncated]\n")
609
+
610
+ # Option to save
611
+ save = input("Save full proposal to file? (y/n): ").strip().lower()
612
+ if save == 'y':
613
+ filename = "grant_proposal.txt"
614
+ with open(filename, 'w', encoding='utf-8') as f:
615
+ f.write(full_text)
616
+ print(f"✅ Saved to {filename}")
617
+ else:
618
+ print(f"\n❌ Error: {result['error']}")
619
+
620
+ print("\n✅ Demo complete!")
621
+
622
+
623
+ if __name__ == "__main__":
624
+ demo_writer()
tools/__init__.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ tools package
3
+ Utility tools for the multi-agent system
4
+ """
5
+
6
+ from .llm_wrapper import LLMWrapper, create_llm
7
+ from .pdf_reader import PDFReader
8
+
9
+ __all__ = ['LLMWrapper', 'create_llm', 'PDFReader']
tools/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (367 Bytes). View file
 
tools/__pycache__/llm_wrapper.cpython-310.pyc ADDED
Binary file (8.33 kB). View file
 
tools/__pycache__/pdf_reader.cpython-310.pyc ADDED
Binary file (11.4 kB). View file
 
tools/llm_wrapper.py ADDED
@@ -0,0 +1,369 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ tools/llm_wrapper.py
3
+ Unified LLM interface using Groq for fast inference
4
+ """
5
+
6
+
7
+ from groq import Groq
8
+ import json
9
+ import os
10
+ from typing import Dict, Any, Optional, List
11
+ from dotenv import load_dotenv
12
+ import time
13
+ import re
14
+
15
+
16
+ class LLMWrapper:
17
+ """
18
+ Unified interface for LLM operations using Groq
19
+ Supports multiple models with automatic fallback
20
+ """
21
+ # Available Groq models (in order of preference)
22
+ MODELS = {
23
+ 'best': 'llama-3.1-70b-versatile', # Highest quality
24
+ 'fast': 'llama-3.1-8b-instant', # Fastest
25
+ 'reasoning': 'mixtral-8x7b-32768', # Good for analysis
26
+ 'efficient': 'gemma-7b-it' # Most efficient
27
+ }
28
+
29
+ def __init__(self, api_key: Optional[str] = None, model: str = 'best'):
30
+
31
+ """
32
+ Initialize Groq client
33
+
34
+ Args:
35
+ api_key: Groq API key (or loads from .env)
36
+ model: Model preference ('best', 'fast', 'reasoning', 'efficient')
37
+ """
38
+
39
+ # Load environment variables
40
+ load_dotenv()
41
+
42
+ # Get API key
43
+ self.api_key = api_key or os.getenv("GROQ_API_KEY")
44
+
45
+ if not self.api_key:
46
+ raise ValueError(
47
+ "GROQ_API_KEY not found! "
48
+ "Set it in .env file or pass as parameter. "
49
+ "Get free key: https://console.groq.com/keys"
50
+ )
51
+
52
+ # Initialize client
53
+ self.client = Groq(api_key=self.api_key)
54
+
55
+ # Set model
56
+ self.model = self.MODELS.get(model, self.MODELS['best'])
57
+
58
+ # Stats tracking
59
+ self.total_tokens = 0
60
+ self.total_calls = 0
61
+ self.total_errors = 0
62
+
63
+ print(f"✅ Groq LLM initialized with model: {self.model}")
64
+
65
+
66
+ def generate(
67
+ self,
68
+ prompt: str,
69
+ max_tokens: int = 1000,
70
+ temperature: float = 0.7, # This parameter is used to make the output midly creative
71
+ system_prompt: Optional[str] = None
72
+ ) -> str:
73
+ """
74
+ Generate text response
75
+
76
+ Args:
77
+ prompt: User prompt
78
+ max_tokens: Maximum tokens to generate
79
+ temperature: Sampling temperature (0.0-1.0)
80
+ system_prompt: Optional system instruction
81
+
82
+ Returns:
83
+ Generated text
84
+ """
85
+
86
+ try:
87
+ # Build messages
88
+ messages = []
89
+
90
+ if system_prompt:
91
+ messages.append({
92
+ "role": "system",
93
+ "content": system_prompt
94
+ })
95
+
96
+ messages.append({
97
+ "role": "user",
98
+ "content": prompt
99
+ })
100
+
101
+ # Call Groq API
102
+ start_time = time.time()
103
+
104
+ response = self.client.chat.completions.create(
105
+ model=self.model,
106
+ messages=messages,
107
+ max_tokens=max_tokens,
108
+ temperature=temperature,
109
+ top_p=0.95
110
+ )
111
+
112
+ elapsed = time.time() - start_time
113
+
114
+ # Extract response
115
+ result = response.choices[0].message.content
116
+
117
+ # Update stats
118
+ self.total_tokens += response.usage.total_tokens
119
+ self.total_calls += 1
120
+
121
+ print(f"✅ LLM call completed in {elapsed:.2f}s ({response.usage.total_tokens} tokens)")
122
+
123
+ return result
124
+
125
+ except Exception as e:
126
+ self.total_errors += 1
127
+ print(f"❌ LLM error: {e}")
128
+ raise
129
+
130
+ def generate_structured(
131
+ self,
132
+ prompt: str,
133
+ schema: Dict[str, Any],
134
+ max_tokens: int = 2000,
135
+ temperature: float = 0.3
136
+ ) -> Dict[str, Any]:
137
+ """
138
+ Generate JSON response matching a schema
139
+
140
+ Args:
141
+ prompt: User prompt
142
+ schema: Expected JSON schema
143
+ max_tokens: Maximum tokens
144
+ temperature: Lower for more deterministic JSON
145
+
146
+ Returns:
147
+ Parsed JSON object
148
+ """
149
+ # Add JSON instruction to prompt
150
+ schema_str = json.dumps(schema, indent=2)
151
+
152
+ full_prompt = f"""{prompt}
153
+
154
+ IMPORTANT: Respond with ONLY valid JSON matching this schema:
155
+ {schema_str}
156
+
157
+ Do not include any explanation or markdown formatting.
158
+
159
+ Return pure JSON that can be parsed directly."""
160
+
161
+ system_prompt = "You are a precise JSON generator. Always return valid JSON with no additional text."
162
+
163
+ # Generate response
164
+ response_text = self.generate(
165
+ prompt=full_prompt,
166
+ max_tokens=max_tokens,
167
+ temperature=temperature,
168
+ system_prompt=system_prompt
169
+ )
170
+
171
+ # Parse JSON
172
+ try:
173
+ # Try direct parse
174
+ return json.loads(response_text)
175
+ except json.JSONDecodeError:
176
+ # Try to extract JSON from markdown code blocks
177
+ json_match = re.search(r'```json\s*\n(.*?)\n```', response_text, re.DOTALL)
178
+ if json_match:
179
+ return json.loads(json_match.group(1))
180
+
181
+ # Try to find JSON object in text
182
+ json_match = re.search(r'\{.*\}', response_text, re.DOTALL)
183
+ if json_match:
184
+ return json.loads(json_match.group(0))
185
+
186
+ # If all fails, raise error
187
+ raise ValueError(f"Could not parse JSON from response: {response_text[:200]}")
188
+
189
+
190
+ def generate_with_retry(
191
+ self,
192
+ prompt: str,
193
+ max_retries: int = 3,
194
+ **kwargs
195
+ ) -> str:
196
+ """
197
+ Generate with automatic retry on failure
198
+
199
+ Args:
200
+ prompt: User prompt
201
+ max_retries: Maximum retry attempts
202
+ **kwargs: Additional arguments for generate()
203
+
204
+ Returns:
205
+ Generated text
206
+ """
207
+ last_error = None
208
+
209
+ for attempt in range(max_retries):
210
+ try:
211
+ return self.generate(prompt, **kwargs)
212
+ except Exception as e:
213
+ last_error = e
214
+ print(f"⚠️ Attempt {attempt + 1} failed: {e}")
215
+ if attempt < max_retries - 1:
216
+ time.sleep(2 ** attempt) # Exponential backoff
217
+
218
+ raise last_error
219
+
220
+ def batch_generate(
221
+ self,
222
+ prompts: List[str],
223
+ max_tokens: int = 1000,
224
+ temperature: float = 0.7
225
+ ) -> List[str]:
226
+ """
227
+ Generate responses for multiple prompts
228
+
229
+ Args:
230
+ prompts: List of prompts
231
+ max_tokens: Max tokens per response
232
+ temperature: Sampling temperature
233
+
234
+ Returns:
235
+ List of responses
236
+ """
237
+ results = []
238
+
239
+ for i, prompt in enumerate(prompts):
240
+ print(f"Processing prompt {i + 1}/{len(prompts)}...")
241
+
242
+ try:
243
+ response = self.generate(
244
+ prompt=prompt,
245
+ max_tokens=max_tokens,
246
+ temperature=temperature
247
+ )
248
+ results.append(response)
249
+ except Exception as e:
250
+ print(f"❌ Prompt {i + 1} failed: {e}")
251
+ results.append(None)
252
+
253
+ # Rate limiting (30 req/min = 2 sec between calls)
254
+ if i < len(prompts) - 1:
255
+ time.sleep(2)
256
+
257
+ return results
258
+
259
+ def count_tokens(self, text: str) -> int:
260
+ """
261
+ Estimate token count (rough approximation)
262
+
263
+ Args:
264
+ text: Input text
265
+
266
+ Returns:
267
+ Estimated token count
268
+ """
269
+ # Rough estimate: 1 token ≈ 4 characters
270
+ return len(text) // 4
271
+
272
+ def get_stats(self) -> Dict[str, Any]:
273
+ """Get usage statistics"""
274
+ return {
275
+ 'total_calls': self.total_calls,
276
+ 'total_tokens': self.total_tokens,
277
+ 'total_errors': self.total_errors,
278
+ 'model': self.model,
279
+ 'avg_tokens_per_call': self.total_tokens / max(self.total_calls, 1)
280
+ }
281
+
282
+ def reset_stats(self):
283
+ """Reset usage statistics"""
284
+ self.total_tokens = 0
285
+ self.total_calls = 0
286
+ self.total_errors = 0
287
+
288
+ # ==================== HELPER FUNCTIONS ====================
289
+
290
+ def create_llm(model: str = 'best', api_key: Optional[str] = None) -> LLMWrapper:
291
+ """
292
+ Convenience function to create LLM wrapper
293
+
294
+ Args:
295
+ model: Model preference
296
+ api_key: Optional API key
297
+
298
+ Returns:
299
+ LLMWrapper instance
300
+ """
301
+ return LLMWrapper(api_key=api_key, model=model)
302
+
303
+
304
+ # ==================== DEMO & TESTING ====================
305
+
306
+ def demo_llm():
307
+ """Demonstrate LLM wrapper functionality"""
308
+
309
+ print("="*60)
310
+ print("🤖 GROQ LLM WRAPPER DEMO")
311
+ print("="*60)
312
+ print()
313
+
314
+ # Initialize LLM
315
+ llm = LLMWrapper(model='fast') # Use fast model for demo
316
+
317
+ # Test 1: Simple generation
318
+ print("\n📝 Test 1: Simple Text Generation")
319
+ print("-" * 60)
320
+ response = llm.generate(
321
+ prompt="Explain what a research paper abstract is in one sentence.",
322
+ max_tokens=100,
323
+ temperature=0.7
324
+ )
325
+ print(f"Response: {response}")
326
+
327
+ # Test 2: Structured JSON generation
328
+ print("\n📊 Test 2: Structured JSON Output")
329
+ print("-" * 60)
330
+
331
+ schema = {
332
+ "title": "string",
333
+ "summary": "string",
334
+ "key_points": ["string", "string", "string"]
335
+ }
336
+
337
+ json_response = llm.generate_structured(
338
+ prompt="Summarize what makes a good research paper in 3 key points.",
339
+ schema=schema,
340
+ temperature=0.3
341
+ )
342
+ print(json.dumps(json_response, indent=2))
343
+
344
+ # Test 3: Batch generation
345
+ print("\n🔄 Test 3: Batch Processing")
346
+ print("-" * 60)
347
+
348
+
349
+ prompts = [
350
+ "Name one benefit of peer review.",
351
+ "Name one challenge in academic publishing.",
352
+ "Name one trend in AI research."
353
+ ]
354
+
355
+ batch_results = llm.batch_generate(prompts, max_tokens=50)
356
+ for i, result in enumerate(batch_results, 1):
357
+ print(f"{i}. {result}")
358
+
359
+ # Show stats
360
+ print("\n📈 Usage Statistics")
361
+ print("-" * 60)
362
+ stats = llm.get_stats()
363
+ print(json.dumps(stats, indent=2))
364
+
365
+ print("\n✅ Demo complete!")
366
+
367
+
368
+ if __name__ == "__main__":
369
+ demo_llm()
tools/pdf_reader.py ADDED
@@ -0,0 +1,509 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ tools/pdf_reader.py
3
+ Extract text and metadata from research papers (PDF)
4
+ """
5
+
6
+ from pypdf import PdfReader
7
+ from typing import Dict, Any, Optional, List
8
+ import re
9
+ import os
10
+
11
+ class PDFReader:
12
+ """
13
+ PDF extraction tool for research papers
14
+
15
+ Features:
16
+ - Extract full text
17
+ - Extract metadata (title, author, etc.)
18
+ - Identify abstract
19
+ - Extract sections
20
+ - Handle multi-column layouts
21
+ """
22
+
23
+ def __init__(self):
24
+ self.supported_extensions = ['.pdf']
25
+ print("✅ PDF Reader initialized")
26
+
27
+
28
+ def extract_text(self, pdf_path: str, max_pages: Optional[int] = None) -> str:
29
+ """
30
+ Extract all text from PDF
31
+
32
+ Args:
33
+ pdf_path: Path to PDF file
34
+ max_pages: Maximum pages to extract (None = all)
35
+
36
+ Returns:
37
+ Extracted text as string
38
+ """
39
+ if not os.path.exists(pdf_path):
40
+ raise FileNotFoundError(f"PDF not found: {pdf_path}")
41
+
42
+ if not pdf_path.lower().endswith('.pdf'):
43
+ raise ValueError(f"Not a PDF file: {pdf_path}")
44
+
45
+ print(f"📖 Reading PDF: {pdf_path}")
46
+
47
+ try:
48
+ reader = PdfReader(pdf_path)
49
+ num_pages = len(reader.pages)
50
+
51
+ print(f" Pages: {num_pages}")
52
+
53
+ # Extract text from pages
54
+ text_parts = []
55
+ pages_to_read = min(num_pages, max_pages) if max_pages else num_pages
56
+
57
+ for i in range(pages_to_read):
58
+ page = reader.pages[i]
59
+ page_text = page.extract_text()
60
+ text_parts.append(page_text)
61
+
62
+ if (i + 1) % 10 == 0:
63
+ print(f" Processed {i + 1}/{pages_to_read} pages...")
64
+
65
+ full_text = '\n\n'.join(text_parts)
66
+
67
+ print(f"✅ Extracted {len(full_text)} characters from {pages_to_read} pages")
68
+
69
+ return full_text
70
+
71
+ except Exception as e:
72
+ print(f"❌ PDF extraction error: {e}")
73
+ raise
74
+
75
+
76
+ def get_paper_info(self, pdf_path: str) -> Dict[str, Any]:
77
+ """
78
+ Extract metadata and basic info from PDF
79
+
80
+ Returns:
81
+ {
82
+ 'metadata': {...},
83
+ 'num_pages': int,
84
+ 'abstract': str,
85
+ 'sections': [...]
86
+ }
87
+ """
88
+ print(f"📊 Extracting paper info from: {pdf_path}")
89
+
90
+ try:
91
+ reader = PdfReader(pdf_path)
92
+
93
+ # Get metadata
94
+ metadata = {}
95
+ if reader.metadata:
96
+ metadata = {
97
+ 'title': reader.metadata.get('/Title', ''),
98
+ 'author': reader.metadata.get('/Author', ''),
99
+ 'subject': reader.metadata.get('/Subject', ''),
100
+ 'creator': reader.metadata.get('/Creator', ''),
101
+ 'producer': reader.metadata.get('/Producer', ''),
102
+ 'creation_date': str(reader.metadata.get('/CreationDate', '')),
103
+ }
104
+
105
+ # Get number of pages
106
+ num_pages = len(reader.pages)
107
+
108
+ # Extract first few pages for abstract detection
109
+ first_pages_text = ''
110
+ for i in range(min(3, num_pages)): # Check first 3 pages
111
+ first_pages_text += reader.pages[i].extract_text() + '\n\n'
112
+
113
+ # Try to extract abstract
114
+ abstract = self._extract_abstract(first_pages_text)
115
+
116
+ # Try to identify sections
117
+ sections = self._extract_sections(first_pages_text)
118
+
119
+ info = {
120
+ 'metadata': metadata,
121
+ 'num_pages': num_pages,
122
+ 'abstract': abstract,
123
+ 'sections': sections,
124
+ 'file_path': pdf_path,
125
+ 'file_size': os.path.getsize(pdf_path)
126
+ }
127
+
128
+ print(f"✅ Paper info extracted:")
129
+ print(f" Title: {metadata.get('title', 'Not found')[:50]}...")
130
+ print(f" Pages: {num_pages}")
131
+ print(f" Abstract: {'Found' if abstract else 'Not found'}")
132
+
133
+ return info
134
+
135
+ except Exception as e:
136
+ print(f"❌ Error extracting paper info: {e}")
137
+ return {
138
+ 'metadata': {},
139
+ 'num_pages': 0,
140
+ 'abstract': '',
141
+ 'sections': [],
142
+ 'error': str(e)
143
+ }
144
+
145
+ def _extract_abstract(self, text: str) -> str:
146
+ """Try to extract abstract from paper text"""
147
+
148
+ # Look for "Abstract" section
149
+ # Common patterns:
150
+ # - "Abstract\n"
151
+ # - "ABSTRACT\n"
152
+ # - "Abstract—"
153
+ # - "Abstract:"
154
+
155
+ patterns = [
156
+ r'(?i)abstract[:\-—]\s*(.*?)(?=\n\s*\n|\n\s*1\.|\n\s*introduction|$)',
157
+ r'(?i)abstract\s*\n\s*(.*?)(?=\n\s*\n|\n\s*1\.|\n\s*introduction|$)',
158
+ ]
159
+
160
+ for pattern in patterns:
161
+ match = re.search(pattern, text, re.DOTALL | re.IGNORECASE)
162
+ if match:
163
+ abstract = match.group(1).strip()
164
+
165
+ # Clean up abstract
166
+ abstract = re.sub(r'\s+', ' ', abstract) # Remove extra whitespace
167
+ abstract = abstract[:1000] # Limit length
168
+
169
+ if len(abstract) > 50: # Must be substantial
170
+ return abstract
171
+
172
+ return ''
173
+
174
+
175
+ def _extract_sections(self, text: str) -> List[str]:
176
+ """Try to identify paper sections"""
177
+
178
+ # Common section patterns
179
+ section_patterns = [
180
+ r'(?i)^\s*\d+\.?\s+(introduction|background|related work|methodology|method|approach|experiments?|results?|evaluation|discussion|conclusion|references?)',
181
+ r'(?i)^\s*(introduction|background|related work|methodology|method|approach|experiments?|results?|evaluation|discussion|conclusion)\s*\n'
182
+ ]
183
+
184
+ sections = []
185
+
186
+ for pattern in section_patterns:
187
+ matches = re.finditer(pattern, text, re.MULTILINE)
188
+ for match in matches:
189
+ section_name = match.group(1).strip()
190
+ if section_name.lower() not in [s.lower() for s in sections]:
191
+ sections.append(section_name.title())
192
+
193
+ return sections
194
+
195
+ def extract_page_range(
196
+ self,
197
+ pdf_path: str,
198
+ start_page: int,
199
+ end_page: int
200
+ ) -> str:
201
+ """Extract text from specific page range"""
202
+
203
+ try:
204
+ reader = PdfReader(pdf_path)
205
+ num_pages = len(reader.pages)
206
+
207
+ # Validate range
208
+ start_page = max(0, min(start_page, num_pages - 1))
209
+ end_page = max(start_page, min(end_page, num_pages - 1))
210
+
211
+ text_parts = []
212
+ for i in range(start_page, end_page + 1):
213
+ text_parts.append(reader.pages[i].extract_text())
214
+
215
+ return '\n\n'.join(text_parts)
216
+
217
+ except Exception as e:
218
+ print(f"❌ Error extracting page range: {e}")
219
+ return ''
220
+
221
+ def search_text(self, pdf_path: str, search_term: str) -> List[Dict[str, Any]]:
222
+ """
223
+ Search for text in PDF
224
+
225
+ Returns list of matches with page numbers and context
226
+ """
227
+ print(f"🔍 Searching for '{search_term}' in {pdf_path}")
228
+
229
+ try:
230
+ reader = PdfReader(pdf_path)
231
+ matches = []
232
+
233
+ for page_num, page in enumerate(reader.pages):
234
+ text = page.extract_text()
235
+
236
+ # Find all occurrences
237
+ pattern = re.compile(re.escape(search_term), re.IGNORECASE)
238
+
239
+ for match in pattern.finditer(text):
240
+ start = max(0, match.start() - 50)
241
+ end = min(len(text), match.end() + 50)
242
+ context = text[start:end]
243
+
244
+ matches.append({
245
+ 'page': page_num + 1,
246
+ 'context': context,
247
+ 'position': match.start()
248
+ })
249
+
250
+ print(f"✅ Found {len(matches)} matches")
251
+ return matches
252
+
253
+ except Exception as e:
254
+ print(f"❌ Search error: {e}")
255
+ return []
256
+
257
+
258
+ def extract_references(self, pdf_path: str) -> List[str]:
259
+ """Try to extract references/bibliography"""
260
+
261
+ print(f"📚 Extracting references from {pdf_path}")
262
+
263
+ try:
264
+ reader = PdfReader(pdf_path)
265
+ num_pages = len(reader.pages)
266
+
267
+ # References usually in last few pages
268
+ last_pages_text = ''
269
+ start_page = max(0, num_pages - 5)
270
+
271
+ for i in range(start_page, num_pages):
272
+ last_pages_text += reader.pages[i].extract_text() + '\n\n'
273
+
274
+ # Look for references section
275
+ ref_pattern = r'(?i)(references?|bibliography)\s*\n\s*(.*?)(?=\n\s*appendix|\Z)'
276
+ match = re.search(ref_pattern, last_pages_text, re.DOTALL)
277
+
278
+ if match:
279
+ ref_text = match.group(2)
280
+
281
+ # Split into individual references
282
+ # Common patterns: [1], (1), 1., numbered lines
283
+ ref_lines = ref_text.split('\n')
284
+ references = []
285
+ current_ref = ''
286
+
287
+ for line in ref_lines:
288
+ line = line.strip()
289
+
290
+ # Check if new reference (starts with number)
291
+ if re.match(r'^\[?\d+\]?\.?\s+', line):
292
+ if current_ref:
293
+ references.append(current_ref.strip())
294
+ current_ref = line
295
+ else:
296
+ current_ref += ' ' + line
297
+
298
+ if current_ref:
299
+ references.append(current_ref.strip())
300
+
301
+ print(f"✅ Extracted {len(references)} references")
302
+ return references[:50] # Limit to first 50
303
+
304
+ return []
305
+
306
+ except Exception as e:
307
+ print(f"❌ Error extracting references: {e}")
308
+ return []
309
+
310
+ def get_text_stats(self, pdf_path: str) -> Dict[str, Any]:
311
+ """Get statistics about the PDF text"""
312
+
313
+ try:
314
+ text = self.extract_text(pdf_path)
315
+
316
+ stats = {
317
+ 'total_characters': len(text),
318
+ 'total_words': len(text.split()),
319
+ 'total_lines': len(text.split('\n')),
320
+ 'estimated_tokens': len(text) // 4, # Rough estimate
321
+ 'avg_word_length': sum(len(word) for word in text.split()) / max(len(text.split()), 1)
322
+ }
323
+
324
+ return stats
325
+
326
+ except Exception as e:
327
+ return {'error': str(e)}
328
+
329
+
330
+ def validate_pdf(self, pdf_path: str) -> Dict[str, Any]:
331
+ """Validate if PDF is readable and get basic info"""
332
+
333
+ validation = {
334
+ 'valid': False,
335
+ 'exists': False,
336
+ 'is_pdf': False,
337
+ 'readable': False,
338
+ 'num_pages': 0,
339
+ 'has_text': False,
340
+ 'errors': []
341
+ }
342
+
343
+ # Check existence
344
+ if not os.path.exists(pdf_path):
345
+ validation['errors'].append('File does not exist')
346
+ return validation
347
+
348
+ validation['exists'] = True
349
+
350
+ # Check extension
351
+ if not pdf_path.lower().endswith('.pdf'):
352
+ validation['errors'].append('Not a PDF file')
353
+ return validation
354
+
355
+ validation['is_pdf'] = True
356
+
357
+ # Try to read
358
+ try:
359
+ reader = PdfReader(pdf_path)
360
+ validation['readable'] = True
361
+ validation['num_pages'] = len(reader.pages)
362
+
363
+ # Check if has extractable text
364
+ if validation['num_pages'] > 0:
365
+ sample_text = reader.pages[0].extract_text()
366
+ if len(sample_text.strip()) > 50:
367
+ validation['has_text'] = True
368
+ validation['valid'] = True
369
+ else:
370
+ validation['errors'].append('PDF has no extractable text (may be scanned image)')
371
+ else:
372
+ validation['errors'].append('PDF has no pages')
373
+
374
+ except Exception as e:
375
+ validation['errors'].append(f'Read error: {str(e)}')
376
+
377
+ return validation
378
+
379
+
380
+
381
+ # ==================== HELPER FUNCTIONS ====================
382
+
383
+ def clean_text(text: str) -> str:
384
+ """Clean extracted PDF text"""
385
+
386
+ # Remove excessive whitespace
387
+ text = re.sub(r'\s+', ' ', text)
388
+
389
+ # Remove page numbers (common patterns)
390
+ text = re.sub(r'\n\s*\d+\s*\n', '\n', text)
391
+
392
+ # Remove headers/footers (heuristic: short lines at top/bottom)
393
+ lines = text.split('\n')
394
+ cleaned_lines = []
395
+
396
+ for line in lines:
397
+ # Skip very short lines that might be headers/footers
398
+ if len(line.strip()) > 20:
399
+ cleaned_lines.append(line)
400
+
401
+ return '\n'.join(cleaned_lines)
402
+
403
+
404
+ def extract_tables(text: str) -> List[str]:
405
+ """Try to identify table-like structures in text"""
406
+
407
+ tables = []
408
+ lines = text.split('\n')
409
+
410
+ # Look for lines with multiple tabs or aligned columns
411
+ table_lines = []
412
+
413
+ for line in lines:
414
+ # Heuristic: if line has 3+ tabs or multiple sequences of spaces
415
+ if line.count('\t') >= 3 or len(re.findall(r'\s{3,}', line)) >= 3:
416
+ table_lines.append(line)
417
+ elif table_lines:
418
+ # End of table
419
+ if len(table_lines) >= 3:
420
+ tables.append('\n'.join(table_lines))
421
+ table_lines = []
422
+
423
+ return tables
424
+
425
+ # ==================== DEMO ====================
426
+
427
+ def demo_pdf_reader():
428
+ """Demo the PDF Reader"""
429
+
430
+ print("="*60)
431
+ print("📄 PDF READER DEMO")
432
+ print("="*60)
433
+ print()
434
+
435
+ reader = PDFReader()
436
+
437
+ # Ask for PDF path
438
+ print("Enter path to a PDF research paper to test:")
439
+ pdf_path = input("Path: ").strip()
440
+
441
+ if not pdf_path:
442
+ print("⏭️ No path provided, exiting demo")
443
+ return
444
+
445
+ print()
446
+
447
+
448
+ # Validate PDF
449
+ print("🔍 Validating PDF...")
450
+ validation = reader.validate_pdf(pdf_path)
451
+ print(f"Valid: {validation['valid']}")
452
+
453
+ if not validation['valid']:
454
+ print(f"❌ Errors: {validation['errors']}")
455
+ return
456
+
457
+ print()
458
+
459
+
460
+ # Get paper info
461
+ print("📊 Extracting paper info...")
462
+ info = reader.get_paper_info(pdf_path)
463
+
464
+ print(f"\nMetadata:")
465
+ for key, value in info['metadata'].items():
466
+ if value:
467
+ print(f" {key}: {value}")
468
+
469
+ print(f"\nPages: {info['num_pages']}")
470
+ print(f"File size: {info['file_size']:,} bytes")
471
+
472
+ if info['abstract']:
473
+ print(f"\nAbstract (first 200 chars):")
474
+ print(f" {info['abstract'][:200]}...")
475
+
476
+ if info['sections']:
477
+ print(f"\nSections found: {', '.join(info['sections'])}")
478
+
479
+ print()
480
+
481
+ # Extract text
482
+ print("📖 Extracting full text (first 5 pages)...")
483
+ text = reader.extract_text(pdf_path, max_pages=5)
484
+
485
+ print(f"\nExtracted text (first 500 chars):")
486
+ print(f" {text[:500]}...")
487
+
488
+ # Get stats
489
+ print("\n📈 Text statistics:")
490
+ stats = reader.get_text_stats(pdf_path)
491
+ for key, value in stats.items():
492
+ print(f" {key}: {value}")
493
+
494
+ # Search test
495
+ print("\n🔍 Search test:")
496
+ search_term = input("Enter term to search (or Enter to skip): ").strip()
497
+
498
+ if search_term:
499
+ matches = reader.search_text(pdf_path, search_term)
500
+ print(f"\nFound {len(matches)} matches:")
501
+ for i, match in enumerate(matches[:3], 1):
502
+ print(f"\n {i}. Page {match['page']}:")
503
+ print(f" ...{match['context']}...")
504
+
505
+ print("\n✅ Demo complete!")
506
+
507
+
508
+ if __name__ == "__main__":
509
+ demo_pdf_reader