Spaces:

AyobamiMichael
/

grant-proposal-generator

Sleeping

App Files Files Community

AyobamiMichael commited on Oct 30, 2025

Commit

bcf0130

verified ·

1 Parent(s): fdf8198

Upload 16 files

Browse files

Files changed (16) hide show

agents/__init__.py +11 -0
agents/__pycache__/__init__.cpython-310.pyc +0 -0
agents/__pycache__/analyst_agent.cpython-310.pyc +0 -0
agents/__pycache__/evaluator_agent.cpython-310.pyc +0 -0
agents/__pycache__/innovator_agent.cpython-310.pyc +0 -0
agents/__pycache__/writer_agent.cpython-310.pyc +0 -0
agents/analyst_agent.py +339 -0
agents/evaluator_agent.py +423 -0
agents/innovator_agent.py +493 -0
agents/writer_agent.py +624 -0
tools/__init__.py +9 -0
tools/__pycache__/__init__.cpython-310.pyc +0 -0
tools/__pycache__/llm_wrapper.cpython-310.pyc +0 -0
tools/__pycache__/pdf_reader.cpython-310.pyc +0 -0
tools/llm_wrapper.py +369 -0
tools/pdf_reader.py +509 -0

agents/__init__.py ADDED Viewed

	@@ -0,0 +1,11 @@

+"""
+agents package
+Specialized AI agents for the multi-agent system
+"""
+from .analyst_agent import AnalystAgent
+from .evaluator_agent import EvaluatorAgent
+from .innovator_agent import InnovatorAgent
+from .writer_agent import WriterAgent
+__all__ = ['AnalystAgent', 'EvaluatorAgent', 'InnovatorAgent', 'WriterAgent']

agents/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (477 Bytes). View file

agents/__pycache__/analyst_agent.cpython-310.pyc ADDED Viewed

Binary file (8.34 kB). View file

agents/__pycache__/evaluator_agent.cpython-310.pyc ADDED Viewed

Binary file (10.5 kB). View file

agents/__pycache__/innovator_agent.cpython-310.pyc ADDED Viewed

Binary file (12.8 kB). View file

agents/__pycache__/writer_agent.cpython-310.pyc ADDED Viewed

Binary file (16.6 kB). View file

agents/analyst_agent.py ADDED Viewed

	@@ -0,0 +1,339 @@

+"""
+agents/analyst_agent.py
+Extract and analyze research paper content
+"""
+import sys
+import os
+# Add parent directory to path
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from demo_phase1 import BaseAgent, Message, MessageType
+from typing import Dict, Any
+import json
+class AnalystAgent(BaseAgent):
+    """
+    Analyst Agent - Paper Analysis & Information Extraction
+    Role: Extract structured information from research papers
+    Personality: Precise, detail-oriented, technical
+    Capabilities:
+    - Extract paper metadata (title, authors, year)
+    - Identify key contributions
+    - Analyze methodology
+    - Assess novelty
+    - Identify gaps and unclear sections
+    """
+    def __init__(self, message_queue, llm, pdf_reader):
+        super().__init__(
+            name="analyst",
+            role="Paper Analysis & Information Extraction",
+            message_queue=message_queue
+        )
+        self.llm = llm
+        self.pdf_reader = pdf_reader
+    def process(self, message: Message) -> Dict[str, Any]:
+        """
+        Process analysis request
+        Expected message content:
+        {
+            'action': 'analyze',
+            'paper_path': 'path/to/paper.pdf'
+        }
+        Returns:
+        {
+            'title': '...',
+            'authors': [...],
+            'key_contributions': [...],
+            'methodology': {...},
+            'results': {...},
+            'novelty_assessment': {...},
+            'gaps_identified': [...]
+        }
+        """
+        action = message.content.get('action')
+        if action != 'analyze':
+            return {'error': f'Unknown action: {action}'}
+        paper_path = message.content.get('paper_path')
+        if not paper_path:
+            return {'error': 'No paper_path provided'}
+        print(f"📄 Analyst: Processing paper: {paper_path}")
+        try:
+            # Extract text from PDF
+            paper_info = self.pdf_reader.get_paper_info(paper_path)
+            full_text = self.pdf_reader.extract_text(paper_path)
+            print(f"✅ Analyst: Extracted {len(full_text)} characters")
+            # Analyze paper
+            analysis = self._analyze_paper(
+                full_text=full_text[:10000],  # First 10K chars
+                abstract=paper_info.get('abstract', ''),
+                metadata=paper_info.get('metadata', {})
+            )
+            print(f"✅ Analyst: Analysis complete")
+            return analysis
+        except Exception as e:
+            print(f"❌ Analyst error: {e}")
+            return {'error': str(e)}
+    def _analyze_paper(
+        self,
+        full_text: str,
+        abstract: str,
+        metadata: Dict
+    ) -> Dict[str, Any]:
+        """Use LLM to extract structured information"""
+        print("🧠 Analyst: Calling LLM for analysis...")
+        # Build analysis prompt
+        prompt = f"""Analyze this research paper and extract key information.
+Paper Metadata:
+- Title: {metadata.get('title', 'Not found')}
+- Author: {metadata.get('author', 'Not found')}
+- Pages: {metadata.get('num_pages', 'Unknown')}
+Abstract:
+{abstract if abstract else 'Abstract not extracted'}
+Paper Text (first part):
+{full_text}
+Extract the following information:
+1. **Title**: The paper's title (if not in metadata, extract from text)
+2. **Authors**: List of author names
+3. **Year**: Publication year if mentioned
+4. **Venue**: Conference or journal name if mentioned
+5. **Key Contributions**: 3-5 main contributions of this paper
+6. **Methodology**: Brief description of the approach/method used
+7. **Datasets**: What datasets were used (if any)
+8. **Evaluation Metrics**: Metrics used to evaluate (if mentioned)
+9. **Main Results**: Key findings or performance improvements
+10. **Limitations**: Any limitations mentioned by authors
+11. **Novelty Score**: Rate the novelty from 0-10 with brief reasoning
+12. **Gaps**: Any unclear sections or missing information
+Be precise and extract only information clearly stated in the paper."""
+        # Define expected schema
+        schema = {
+            "title": "string",
+            "authors": ["string"],
+            "year": "number or null",
+            "venue": "string or null",
+            "key_contributions": ["string"],
+            "methodology": {
+                "approach": "string",
+                "datasets": ["string"],
+                "evaluation_metrics": ["string"]
+            },
+            "main_results": {
+                "summary": "string",
+                "performance_improvements": ["string"]
+            },
+            "limitations": ["string"],
+            "novelty_assessment": {
+                "score": "number (0-10)",
+                "reasoning": "string"
+            },
+            "gaps_identified": ["string"]
+        }
+          # Call LLM with structured output
+        try:
+            analysis = self.llm.generate_structured(
+                prompt=prompt,
+                schema=schema,
+                max_tokens=2000,
+                temperature=0.3  # Lower for more precise extraction
+            )
+            print(f"✅ Analyst: LLM analysis successful")
+            # Add metadata
+            analysis['extraction_metadata'] = {
+                'source': metadata.get('title', 'Unknown'),
+                'pages': metadata.get('num_pages', 0),
+                'text_length': len(full_text),
+                'abstract_available': bool(abstract)
+            }
+            return analysis
+        except Exception as e:
+            print(f"❌ Analyst LLM error: {e}")
+            # Return fallback analysis
+            return {
+                'title': metadata.get('title', 'Unknown'),
+                'authors': [metadata.get('author', 'Unknown')],
+                'year': None,
+                'venue': None,
+                'key_contributions': ['Could not extract - LLM error'],
+                'methodology': {
+                    'approach': 'Could not extract',
+                    'datasets': [],
+                    'evaluation_metrics': []
+                },
+                'main_results': {
+                    'summary': 'Could not extract',
+                    'performance_improvements': []
+                },
+                'limitations': [],
+                'novelty_assessment': {
+                    'score': 0,
+                    'reasoning': f'Analysis failed: {str(e)}'
+                },
+                'gaps_identified': [f'LLM analysis error: {str(e)}'],
+                'error': str(e)
+            }
+    def quick_summary(self, paper_path: str) -> str:
+        """Generate a quick one-paragraph summary"""
+        print(f"📝 Analyst: Generating quick summary for {paper_path}")
+        try:
+            paper_info = self.pdf_reader.get_paper_info(paper_path)
+            text_sample = self.pdf_reader.extract_text(paper_path)[:5000]
+            prompt = f"""Provide a concise one-paragraph summary of this research paper.
+Title: {paper_info.get('metadata', {}).get('title', 'Unknown')}
+Text:
+{text_sample}
+Summary (1 paragraph, 3-5 sentences):"""
+            summary = self.llm.generate(
+                prompt=prompt,
+                max_tokens=200,
+                temperature=0.5
+            )
+            return summary.strip()
+        except Exception as e:
+            return f"Could not generate summary: {str(e)}"
+    def identify_research_gaps(self, analysis: Dict[str, Any]) -> list:
+        """Identify potential research gaps based on analysis"""
+        print("🔍 Analyst: Identifying research gaps...")
+        prompt = f"""Based on this paper analysis, identify 3-5 potential research gaps or future directions:
+Key Contributions:
+{json.dumps(analysis.get('key_contributions', []), indent=2)}
+Methodology:
+{json.dumps(analysis.get('methodology', {}), indent=2)}
+Limitations:
+{json.dumps(analysis.get('limitations', []), indent=2)}
+Identify:
+1. What questions remain unanswered?
+2. What extensions could be explored?
+3. What weaknesses could be addressed?
+4. What new applications could be investigated?
+Provide 3-5 concrete research gaps."""
+        try:
+            response = self.llm.generate(
+                prompt=prompt,
+                max_tokens=500,
+                temperature=0.7
+            )
+            # Parse into list
+            gaps = [line.strip() for line in response.split('\n') if line.strip()]
+            return gaps
+        except Exception as e:
+            print(f"❌ Gap identification error: {e}")
+            return ["Could not identify gaps due to error"]
+# ==================== DEMO ====================
+def demo_analyst():
+    """Demo the Analyst Agent"""
+    print("="*60)
+    print("📊 ANALYST AGENT DEMO")
+    print("="*60)
+    print()
+    # Initialize dependencies
+    from llm_wrapper import LLMWrapper
+    from pdf_reader import PDFReader
+    from demo_phase1 import MessageQueue
+    llm = LLMWrapper(model='fast')
+    pdf_reader = PDFReader()
+    queue = MessageQueue()
+      # Create analyst agent
+    analyst = AnalystAgent(queue, llm, pdf_reader)
+    print(f"✅ {analyst.name} initialized")
+    print(f"   Role: {analyst.role}")
+    print()
+    # Test with a sample paper (you'll need to provide path)
+    print("📄 To test, provide path to a PDF research paper:")
+    paper_path = input("Enter path (or press Enter to skip): ").strip()
+    if paper_path:
+        # Create a test message
+        test_message = Message(
+            sender="tester",
+            recipient="analyst",
+            message_type=MessageType.REQUEST,
+            content={
+                'action': 'analyze',
+                'paper_path': paper_path
+            }
+        )
+        # Process
+        result = analyst.process(test_message)
+        print("\n" + "="*60)
+        print("📊 ANALYSIS RESULT")
+        print("="*60)
+        print(json.dumps(result, indent=2))
+    else:
+        print("⏭️  Skipping test (no paper provided)")
+    print("\n✅ Demo complete!")
+if __name__ == "__main__":
+    demo_analyst()

agents/evaluator_agent.py ADDED Viewed

	@@ -0,0 +1,423 @@

+"""
+agents/evaluator_agent.py
+Assess paper quality and funding potential
+"""
+import sys
+import os
+# Add parent directory to path
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from demo_phase1 import BaseAgent, Message, MessageType
+from typing import Dict, Any
+import json
+class EvaluatorAgent(BaseAgent):
+    """
+    Evaluator Agent - Paper Quality Assessment & Review
+    Role: Assess paper quality and impact potential
+    Personality: Critical but fair, peer-review style
+    Capabilities:
+    - Score originality, methodology, and impact
+    - Identify weaknesses in approach
+    - Assess funding potential
+    - Generate reviewer-style feedback
+    - Compare to state-of-the-art
+    """
+    def __init__(self, message_queue, llm):
+        super().__init__(
+            name="evaluator",
+            role="Paper Quality Assessment & Review",
+            message_queue=message_queue
+        )
+        self.llm = llm
+    def process(self, message: Message) -> Dict[str, Any]:
+        """
+        Process evaluation request
+        Expected message content:
+        {
+            'action': 'evaluate',
+            'analysis': {... analyst output ...}
+        }
+        Returns:
+        {
+            'scores': {
+                'originality': 0-10,
+                'methodology': 0-10,
+                'impact': 0-10,
+                'clarity': 0-10,
+                'overall': 0-10
+            },
+            'funding_potential': 'HIGH' | 'MEDIUM' | 'LOW',
+            'strengths': [...],
+            'weaknesses': [...],
+            'reviewer_feedback': [...],
+            'recommendations': {...}
+        }
+        """
+        action = message.content.get('action')
+        if action != 'evaluate':
+            return {'error': f'Unknown action: {action}'}
+        analysis = message.content.get('analysis')
+        if not analysis:
+            return {'error': 'No analysis provided'}
+        print(f"⚖️ Evaluator: Assessing paper quality...")
+        try:
+            # Evaluate the paper
+            evaluation = self._evaluate_paper(analysis)
+            print(f"✅ Evaluator: Evaluation complete")
+            print(f"   Overall Score: {evaluation.get('scores', {}).get('overall', 0)}/10")
+            print(f"   Funding Potential: {evaluation.get('funding_potential', 'UNKNOWN')}")
+            return evaluation
+        except Exception as e:
+            print(f"❌ Evaluator error: {e}")
+            return {'error': str(e)}
+    def _evaluate_paper(self, analysis: Dict[str, Any]) -> Dict[str, Any]:
+        """Use LLM to evaluate paper quality"""
+        print("🧠 Evaluator: Calling LLM for evaluation...")
+        # Build evaluation prompt
+        prompt = f"""You are a peer reviewer evaluating this research paper. Provide a thorough assessment.
+PAPER ANALYSIS:
+{json.dumps(analysis, indent=2)}
+Evaluate the paper on these dimensions:
+1. **Originality** (0-10): How novel is this work?
+   - Are the ideas new?
+   - Does it advance the field?
+   - Is it incremental or groundbreaking?
+2. **Methodology** (0-10): How sound is the approach?
+   - Is the method well-designed?
+   - Are experiments rigorous?
+   - Are datasets appropriate?
+   - Are comparisons fair?
+3. **Impact** (0-10): What is the potential impact?
+   - Will this influence future research?
+   - Are there practical applications?
+   - Is it significant for the community?
+4. **Clarity** (0-10): How well is it presented?
+   - Is the writing clear?
+   - Are results well-explained?
+   - Is it reproducible?
+5. **Overall** (0-10): Overall quality assessment
+Additionally provide:
+- **Funding Potential**: HIGH / MEDIUM / LOW (would this get funded?)
+- **Strengths**: 3-5 key strengths
+- **Weaknesses**: 3-5 key weaknesses or concerns
+- **Reviewer Feedback**: 3-5 critical comments (peer-review style)
+- **Recommendations**: What needs improvement for acceptance/funding?
+Be critical but constructive. Think like a senior researcher reviewing for a top conference."""
+        # Define expected schema
+        schema = {
+            "scores": {
+                "originality": "number (0-10)",
+                "methodology": "number (0-10)",
+                "impact": "number (0-10)",
+                "clarity": "number (0-10)",
+                "overall": "number (0-10)"
+            },
+            "funding_potential": "HIGH | MEDIUM | LOW",
+            "strengths": ["string"],
+            "weaknesses": ["string"],
+            "reviewer_feedback": ["string"],
+            "recommendations": {
+                "for_publication": ["string"],
+                "for_funding": ["string"],
+                "future_work": ["string"]
+            },
+            "decision_reasoning": "string"
+        }
+        # Call LLM
+        try:
+            evaluation = self.llm.generate_structured(
+                prompt=prompt,
+                schema=schema,
+                max_tokens=2000,
+                temperature=0.4  # Balanced for thoughtful evaluation
+            )
+            print(f"✅ Evaluator: LLM evaluation successful")
+            # Validate scores are in range
+            for score_name, score in evaluation['scores'].items():
+                if not (0 <= score <= 10):
+                    print(f"⚠️ Warning: {score_name} score out of range: {score}")
+                    evaluation['scores'][score_name] = max(0, min(10, score))
+            return evaluation
+        except Exception as e:
+            print(f"❌ Evaluator LLM error: {e}")
+            # Return fallback evaluation
+            return {
+                'scores': {
+                    'originality': 0,
+                    'methodology': 0,
+                    'impact': 0,
+                    'clarity': 0,
+                    'overall': 0
+                },
+                'funding_potential': 'UNKNOWN',
+                'strengths': [],
+                'weaknesses': [f'Evaluation failed: {str(e)}'],
+                'reviewer_feedback': [f'Could not complete evaluation: {str(e)}'],
+                'recommendations': {
+                    'for_publication': [],
+                    'for_funding': [],
+                    'future_work': []
+                },
+                'decision_reasoning': f'Evaluation error: {str(e)}',
+                'error': str(e)
+            }
+    def compare_to_baseline(
+        self,
+        analysis: Dict[str, Any],
+        baseline_description: str
+    ) -> Dict[str, Any]:
+        """Compare paper to a baseline or state-of-the-art"""
+        print("📊 Evaluator: Comparing to baseline...")
+        prompt = f"""Compare this paper to the baseline/state-of-the-art:
+PAPER RESULTS:
+{json.dumps(analysis.get('main_results', {}), indent=2)}
+BASELINE:
+{baseline_description}
+Provide comparison:
+1. How does this paper improve over baseline?
+2. What are the performance gains?
+3. Is the comparison fair?
+4. What are the limitations of the comparison?
+Be specific about quantitative improvements if mentioned."""
+        try:
+            comparison = self.llm.generate(
+                prompt=prompt,
+                max_tokens=500,
+                temperature=0.5
+            )
+            return {
+                'comparison_summary': comparison,
+                'baseline': baseline_description
+            }
+        except Exception as e:
+            return {
+                'comparison_summary': f'Comparison failed: {str(e)}',
+                'error': str(e)
+            }
+    def assess_reproducibility(self, analysis: Dict[str, Any]) -> Dict[str, Any]:
+        """Assess how reproducible the work is"""
+        print("🔬 Evaluator: Assessing reproducibility...")
+        methodology = analysis.get('methodology', {})
+        prompt = f"""Assess the reproducibility of this research:
+METHODOLOGY:
+{json.dumps(methodology, indent=2)}
+DATASETS: {methodology.get('datasets', [])}
+EVALUATION METRICS: {methodology.get('evaluation_metrics', [])}
+Rate reproducibility (0-10) and identify:
+1. What information is provided?
+2. What is missing for reproduction?
+3. Are code/data available? (if mentioned)
+4. Can someone else replicate this?
+Provide:
+- reproducibility_score (0-10)
+- available_resources (list)
+- missing_information (list)
+- reproducibility_notes (string)"""
+        schema = {
+            "reproducibility_score": "number (0-10)",
+            "available_resources": ["string"],
+            "missing_information": ["string"],
+            "reproducibility_notes": "string"
+        }
+        try:
+            assessment = self.llm.generate_structured(
+                prompt=prompt,
+                schema=schema,
+                temperature=0.3
+            )
+            return assessment
+        except Exception as e:
+            return {
+                'reproducibility_score': 0,
+                'available_resources': [],
+                'missing_information': [f'Assessment error: {str(e)}'],
+                'reproducibility_notes': 'Could not assess',
+                'error': str(e)
+            }
+    def generate_review_summary(self, evaluation: Dict[str, Any]) -> str:
+        """Generate a concise review summary"""
+        scores = evaluation.get('scores', {})
+        funding = evaluation.get('funding_potential', 'UNKNOWN')
+        summary = f"""REVIEW SUMMARY
+{"="*50}
+Overall Score: {scores.get('overall', 0)}/10
+Funding Potential: {funding}
+Scores:
+- Originality: {scores.get('originality', 0)}/10
+- Methodology: {scores.get('methodology', 0)}/10
+- Impact: {scores.get('impact', 0)}/10
+- Clarity: {scores.get('clarity', 0)}/10
+STRENGTHS:
+"""
+        for i, strength in enumerate(evaluation.get('strengths', []), 1):
+            summary += f"{i}. {strength}\n"
+        summary += "\nWEAKNESSES:\n"
+        for i, weakness in enumerate(evaluation.get('weaknesses', []), 1):
+            summary += f"{i}. {weakness}\n"
+        summary += f"\nDECISION: {evaluation.get('decision_reasoning', 'N/A')}"
+        return summary
+# ==================== DEMO ====================
+def demo_evaluator():
+    """Demo the Evaluator Agent"""
+    print("="*60)
+    print("⚖️ EVALUATOR AGENT DEMO")
+    print("="*60)
+    print()
+    # Initialize dependencies
+    from llm_wrapper import LLMWrapper
+    from demo_phase1 import MessageQueue
+    llm = LLMWrapper(model='fast')
+    queue = MessageQueue()
+    # Create evaluator agent
+    evaluator = EvaluatorAgent(queue, llm)
+    print(f"✅ {evaluator.name} initialized")
+    print(f"   Role: {evaluator.role}")
+    print()
+     # Mock analysis from analyst
+    mock_analysis = {
+        'title': 'Attention Is All You Need',
+        'authors': ['Vaswani et al.'],
+        'key_contributions': [
+            'Introduced Transformer architecture',
+            'Replaced RNNs with self-attention',
+            'Achieved state-of-the-art on translation'
+        ],
+        'methodology': {
+            'approach': 'Transformer neural network with multi-head attention',
+            'datasets': ['WMT 2014 English-German', 'WMT 2014 English-French'],
+            'evaluation_metrics': ['BLEU score', 'Training time']
+        },
+        'main_results': {
+            'summary': 'Best BLEU score on translation tasks',
+            'performance_improvements': [
+                '28.4 BLEU on WMT 2014 English-German',
+                'Trained in fraction of time vs RNN models'
+            ]
+        },
+        'limitations': [
+            'Memory intensive for very long sequences',
+            'Less interpretable than RNNs'
+        ],
+        'novelty_assessment': {
+            'score': 9,
+            'reasoning': 'Revolutionary architecture that changed NLP'
+        }
+    }
+     # Create test message
+    test_message = Message(
+        sender="analyst",
+        recipient="evaluator",
+        message_type=MessageType.REQUEST,
+        content={
+            'action': 'evaluate',
+            'analysis': mock_analysis
+        }
+    )
+    # Process
+    print("🧪 Testing with mock Transformer paper analysis...")
+    print()
+    result = evaluator.process(test_message)
+    print("\n" + "="*60)
+    print("⚖️ EVALUATION RESULT")
+    print("="*60)
+    # Show summary
+    if 'error' not in result:
+        summary = evaluator.generate_review_summary(result)
+        print(summary)
+    else:
+        print(f"❌ Error: {result['error']}")
+    print("\n" + "="*60)
+    print("📋 FULL EVALUATION (JSON)")
+    print("="*60)
+    print(json.dumps(result, indent=2))
+    print("\n✅ Demo complete!")
+if __name__ == "__main__":
+    demo_evaluator()

agents/innovator_agent.py ADDED Viewed

	@@ -0,0 +1,493 @@

+"""
+agents/innovator_agent.py
+Generate novel research directions and extensions
+"""
+import sys
+import os
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from demo_phase1 import BaseAgent, Message, MessageType
+from typing import Dict, Any
+import json
+class InnovatorAgent(BaseAgent):
+    """
+    Innovator Agent - Creative Research Extension
+    Role: Identify future directions & applications
+    Personality: Visionary, creative, forward-thinking
+    Capabilities:
+    - Generate 3-5 novel research directions
+    - Identify potential industry applications
+    - Propose extensions to current work
+    - Suggest cross-disciplinary connections
+    - Create "what if" scenarios
+    - Assess commercial potential
+    """
+    def __init__(self, message_queue, llm):
+        super().__init__(
+            name="innovator",
+            role="Creative Research Extension & Future Directions",
+            message_queue=message_queue
+        )
+        self.llm = llm
+    def process(self, message: Message) -> Dict[str, Any]:
+        """
+        Process innovation request
+        Expected message content:
+        {
+            'action': 'innovate',
+            'analysis': {...},  # From analyst
+            'evaluation': {...}  # From evaluator
+        }
+         Returns:
+        {
+            'future_directions': [...],
+            'industry_applications': [...],
+            'extensions': [...],
+            'cross_disciplinary': [...],
+            'commercial_potential': 'HIGH' | 'MEDIUM' | 'LOW',
+            'ten_year_vision': '...',
+            'breakthrough_potential': {...}
+        }
+        """
+        action = message.content.get('action')
+        if action != 'innovate':
+            return {'error': f'Unknown action: {action}'}
+        analysis = message.content.get('analysis')
+        evaluation = message.content.get('evaluation')
+        if not analysis:
+            return {'error': 'No analysis provided'}
+        print(f"💡 Innovator: Generating future directions...")
+        try:
+            # Generate innovations
+            innovations = self._generate_innovations(analysis, evaluation)
+            print(f"✅ Innovator: Generated {len(innovations.get('future_directions', []))} future directions")
+            print(f"   Commercial Potential: {innovations.get('commercial_potential', 'N/A')}")
+            return innovations
+        except Exception as e:
+            print(f"❌ Innovator error: {e}")
+            return {'error': str(e)}
+    def _generate_innovations(
+        self,
+        analysis: Dict[str, Any],
+        evaluation: Dict[str, Any] = None
+    ) -> Dict[str, Any]:
+        """Use LLM to generate innovative directions"""
+        print("🧠 Innovator: Calling LLM for creative ideation...")
+        # Build innovation prompt
+        prompt = f"""You are a visionary research innovator. Based on this paper analysis, generate creative future directions.
+PAPER ANALYSIS:
+Title: {analysis.get('title', 'Unknown')}
+Key Contributions: {json.dumps(analysis.get('key_contributions', []), indent=2)}
+Methodology: {json.dumps(analysis.get('methodology', {}), indent=2)}
+Results: {json.dumps(analysis.get('main_results', {}), indent=2)}
+Limitations: {json.dumps(analysis.get('limitations', []), indent=2)}
+Gaps: {json.dumps(analysis.get('gaps_identified', []), indent=2)}
+Generate innovative extensions and directions:
+1. **Future Research Directions** (3-5 specific directions):
+   - What are the most promising unexplored areas?
+   - What novel variations could be investigated?
+   - What fundamental questions remain?
+2. **Industry Applications** (3-5 real-world applications):
+   - Healthcare, finance, education, manufacturing, etc.
+   - Specific use cases with clear value
+   - Near-term vs long-term opportunities
+3. **Novel Extensions** (3-5 technical extensions):
+   - Algorithmic improvements
+   - New architectures or approaches
+   - Combining with other techniques
+   - Scaling to new domains
+4. **Cross-Disciplinary Connections** (2-4 connections):
+   - How could this intersect with biology, physics, social science, etc.?
+   - Unexpected applications in other fields
+   - Potential for interdisciplinary breakthroughs
+5. **Commercial Potential**: HIGH / MEDIUM / LOW
+   - Can this be monetized?
+   - Market size and demand
+   - Competitive advantages
+6. **10-Year Vision**:
+   - Where could this research lead in a decade?
+   - Transformative potential
+   - Societal impact
+7. **Breakthrough Potential**:
+   - Could this lead to major breakthroughs?
+   - Nobel Prize potential? (be honest)
+   - Paradigm-shifting capability
+Be creative, ambitious, and forward-thinking. Think like a visionary researcher who sees beyond current limitations."""
+        # Define expected schema
+        schema = {
+            "future_directions": [
+                {
+                    "direction": "string (title)",
+                    "description": "string (2-3 sentences)",
+                    "feasibility": "HIGH | MEDIUM | LOW",
+                    "timeframe": "string (1-2 years, 3-5 years, 5-10 years)"
+                }
+            ],
+            "industry_applications": [
+                {
+                    "domain": "string (industry/field)",
+                    "application": "string (specific use case)",
+                    "value_proposition": "string",
+                    "readiness": "string (ready now, 1-2 years, 3-5 years)"
+                }
+            ],
+            "extensions": [
+                {
+                    "extension": "string (title)",
+                    "description": "string",
+                    "technical_challenge": "string"
+                }
+            ],
+            "cross_disciplinary": [
+                {
+                    "field": "string",
+                    "connection": "string",
+                    "potential": "string"
+                }
+            ],
+            "commercial_potential": "HIGH | MEDIUM | LOW",
+            "commercial_reasoning": "string",
+            "ten_year_vision": "string (paragraph)",
+            "breakthrough_potential": {
+                "score": "number (0-10)",
+                "reasoning": "string",
+                "paradigm_shift": "boolean"
+            }
+        }
+          # Call LLM
+        try:
+            innovations = self.llm.generate_structured(
+                prompt=prompt,
+                schema=schema,
+                max_tokens=3000,
+                temperature=0.8  # Higher for creativity
+            )
+            print(f"✅ Innovator: LLM ideation successful")
+            return innovations
+        except Exception as e:
+            print(f"❌ Innovator LLM error: {e}")
+            # Return fallback
+            return {
+                'future_directions': [
+                    {
+                        'direction': 'Could not generate',
+                        'description': f'Ideation failed: {str(e)}',
+                        'feasibility': 'UNKNOWN',
+                        'timeframe': 'Unknown'
+                    }
+                ],
+                'industry_applications': [],
+                'extensions': [],
+                'cross_disciplinary': [],
+                'commercial_potential': 'UNKNOWN',
+                'commercial_reasoning': f'Error: {str(e)}',
+                'ten_year_vision': 'Could not generate vision',
+                'breakthrough_potential': {
+                    'score': 0,
+                    'reasoning': f'Generation failed: {str(e)}',
+                    'paradigm_shift': False
+                },
+                'error': str(e)
+            }
+    def generate_what_if_scenarios(self, analysis: Dict[str, Any]) -> list:
+        """Generate creative 'what if' scenarios"""
+        print("🔮 Innovator: Generating 'what if' scenarios...")
+        prompt = f"""Based on this research, generate 5 creative "what if" scenarios:
+Research: {analysis.get('title', 'Unknown')}
+Contributions: {json.dumps(analysis.get('key_contributions', []))}
+Generate 5 "what if" scenarios exploring:
+1. What if this technique was 100x faster?
+2. What if it could handle 1000x more data?
+3. What if it was combined with [emerging technology]?
+4. What if the assumptions were changed?
+5. What if it was applied to [unexpected domain]?
+Make them specific, creative, and thought-provoking."""
+        try:
+            response = self.llm.generate(
+                prompt=prompt,
+                max_tokens=800,
+                temperature=0.9  # Very creative
+            )
+            # Parse scenarios
+            scenarios = [s.strip() for s in response.split('\n') if s.strip()]
+            return scenarios[:5]
+        except Exception as e:
+            print(f"❌ Scenario generation error: {e}")
+            return ["Could not generate scenarios due to error"]
+    def assess_funding_opportunities(
+        self,
+        innovations: Dict[str, Any]
+    ) -> Dict[str, Any]:
+        """Identify potential funding opportunities"""
+        print("💰 Innovator: Identifying funding opportunities...")
+        prompt = f"""Based on these research innovations, identify funding opportunities:
+INNOVATIONS:
+{json.dumps(innovations, indent=2)}
+Identify:
+1. **Relevant Funding Agencies**:
+   - NSF programs (specific)
+   - NIH if applicable
+   - DARPA if defense-related
+   - Private foundations
+   - Industry partnerships
+2. **Grant Types**:
+   - Small grants ($50K-$250K)
+   - Medium grants ($250K-$1M)
+   - Large grants ($1M+)
+3. **Best Fit Programs** (top 3):
+   - Program name
+   - Why it's a good fit
+   - Typical funding amount
+4. **Funding Timeline**:
+   - When to apply
+   - Competition level"""
+        schema = {
+            "funding_agencies": ["string"],
+            "grant_types": {
+                "small_grants": ["string"],
+                "medium_grants": ["string"],
+                "large_grants": ["string"]
+            },
+            "best_fit_programs": [
+                {
+                    "program": "string",
+                    "agency": "string",
+                    "fit_reasoning": "string",
+                    "typical_amount": "string"
+                }
+            ],
+            "recommended_timeline": "string"
+        }
+        try:
+            opportunities = self.llm.generate_structured(
+                prompt=prompt,
+                schema=schema,
+                max_tokens=1500,
+                temperature=0.5
+            )
+            return opportunities
+        except Exception as e:
+            return {
+                'funding_agencies': [],
+                'grant_types': {},
+                'best_fit_programs': [],
+                'recommended_timeline': 'Unknown',
+                'error': str(e)
+            }
+    def generate_collaboration_network(
+        self,
+        analysis: Dict[str, Any]
+    ) -> Dict[str, Any]:
+        """Suggest potential collaborators and interdisciplinary connections"""
+        print("🤝 Innovator: Mapping collaboration opportunities...")
+        prompt = f"""Based on this research, suggest collaboration opportunities:
+Research: {analysis.get('title', 'Unknown')}
+Field: Based on {json.dumps(analysis.get('methodology', {}))}
+Suggest:
+1. **Complementary Expertise Needed** (3-5):
+   - What skills/knowledge would enhance this?
+   - Specific expertise areas
+2. **Potential Collaborator Types**:
+   - Academic departments
+   - Research labs
+   - Industry partners
+   - Government agencies
+3. **Interdisciplinary Opportunities**:
+   - Fields to connect with
+   - Synergies and benefits
+4. **International Collaboration**:
+   - Countries/regions with relevant expertise
+   - Global research networks"""
+        try:
+            response = self.llm.generate(
+                prompt=prompt,
+                max_tokens=1000,
+                temperature=0.6
+            )
+            return {
+                'collaboration_suggestions': response,
+                'generated': True
+            }
+        except Exception as e:
+            return {
+                'collaboration_suggestions': 'Could not generate',
+                'error': str(e)
+            }
+# ==================== DEMO ====================
+def demo_innovator():
+    """Demo the Innovator Agent"""
+    print("="*60)
+    print("💡 INNOVATOR AGENT DEMO")
+    print("="*60)
+    print()
+    # Initialize dependencies
+    from tools.llm_wrapper import LLMWrapper
+    from demo_phase1 import MessageQueue
+    llm = LLMWrapper(model='fast')
+    queue = MessageQueue()
+    # Create innovator agent
+    innovator = InnovatorAgent(queue, llm)
+    print(f"✅ {innovator.name} initialized")
+    print(f"   Role: {innovator.role}")
+    print()
+     # Mock analysis from previous agents
+    mock_analysis = {
+        'title': 'Attention Is All You Need',
+        'key_contributions': [
+            'Introduced Transformer architecture',
+            'Eliminated recurrence with self-attention',
+            'Achieved SOTA on translation'
+        ],
+        'methodology': {
+            'approach': 'Multi-head self-attention',
+            'datasets': ['WMT 2014'],
+            'evaluation_metrics': ['BLEU']
+        },
+        'main_results': {
+            'summary': 'Best translation performance',
+            'performance_improvements': ['28.4 BLEU on EN-DE']
+        },
+        'limitations': [
+            'O(n²) memory complexity',
+            'Requires large datasets'
+        ],
+        'gaps_identified': [
+            'Efficiency for long sequences',
+            'Applications beyond NLP'
+        ]
+    }
+    # Create test message
+    test_message = Message(
+        sender="supervisor",
+        recipient="innovator",
+        message_type=MessageType.REQUEST,
+        content={
+            'action': 'innovate',
+            'analysis': mock_analysis
+        }
+    )
+     # Process
+    print("🧪 Generating innovations for Transformer paper...")
+    print()
+    result = innovator.process(test_message)
+    print("\n" + "="*60)
+    print("💡 INNOVATION RESULTS")
+    print("="*60)
+    if 'error' not in result:
+        print(f"\n🚀 Future Directions ({len(result.get('future_directions', []))}):")
+        for i, direction in enumerate(result.get('future_directions', [])[:3], 1):
+            print(f"\n{i}. {direction.get('direction', 'N/A')}")
+            print(f"   {direction.get('description', 'N/A')}")
+            print(f"   Feasibility: {direction.get('feasibility', 'N/A')}")
+            print(f"   Timeframe: {direction.get('timeframe', 'N/A')}")
+        print(f"\n🏭 Industry Applications ({len(result.get('industry_applications', []))}):")
+        for i, app in enumerate(result.get('industry_applications', [])[:3], 1):
+            print(f"\n{i}. {app.get('domain', 'N/A')}: {app.get('application', 'N/A')}")
+            print(f"   Value: {app.get('value_proposition', 'N/A')}")
+        print(f"\n💰 Commercial Potential: {result.get('commercial_potential', 'N/A')}")
+        print(f"   {result.get('commercial_reasoning', 'N/A')}")
+        print(f"\n🔮 10-Year Vision:")
+        print(f"   {result.get('ten_year_vision', 'N/A')}")
+        breakthrough = result.get('breakthrough_potential', {})
+        print(f"\n⭐ Breakthrough Potential: {breakthrough.get('score', 0)}/10")
+        print(f"   {breakthrough.get('reasoning', 'N/A')}")
+        print(f"   Paradigm Shift: {breakthrough.get('paradigm_shift', False)}")
+    else:
+        print(f"\n❌ Error: {result['error']}")
+    print("\n" + "="*60)
+    print("📋 FULL OUTPUT (JSON)")
+    print("="*60)
+    print(json.dumps(result, indent=2))
+    print("\n✅ Demo complete!")
+if __name__ == "__main__":
+    demo_innovator()

agents/writer_agent.py ADDED Viewed

	@@ -0,0 +1,624 @@

+"""
+agents/writer_agent.py
+Synthesize all agent outputs into grant proposal
+"""
+import sys
+import os
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from demo_phase1 import BaseAgent, Message, MessageType
+from typing import Dict, Any
+import json
+from datetime import datetime
+class WriterAgent(BaseAgent):
+    """
+    Writer Agent - Grant Proposal Synthesis
+    Role: Create final grant proposal document
+    Personality: Eloquent, persuasive, policy-aware
+    Capabilities:
+    - Synthesize all agent outputs into coherent narrative
+    - Write NSF/NIH-style grant proposal sections
+    - Handle conflicts between agents
+    - Generate executive summary
+    - Create research plan and timeline
+    - Write impact statement
+    - Format professional proposal
+    """
+    def __init__(self, message_queue, llm):
+        super().__init__(
+            name="writer",
+            role="Grant Proposal Synthesis & Document Generation",
+            message_queue=message_queue
+        )
+        self.llm = llm
+    def process(self, message: Message) -> Dict[str, Any]:
+        """
+        Process writing request
+        Expected message content:
+        {
+            'action': 'write_proposal',
+            'analysis': {...},      # From analyst
+            'evaluation': {...},    # From evaluator
+            'innovations': {...},   # From innovator
+            'conflicts': [...]      # Optional: any conflicts
+        }
+         Returns:
+        {
+            'proposal': {
+                'executive_summary': '...',
+                'project_description': '...',
+                'research_plan': '...',
+                'broader_impacts': '...',
+                'budget_justification': '...',
+                'timeline': {...}
+            },
+            'full_text': '...',  # Complete formatted proposal
+            'word_count': int,
+            'metadata': {...}
+        }
+        """
+        action = message.content.get('action')
+        if action != 'write_proposal':
+            return {'error': f'Unknown action: {action}'}
+        analysis = message.content.get('analysis')
+        evaluation = message.content.get('evaluation')
+        innovations = message.content.get('innovations')
+        conflicts = message.content.get('conflicts', [])
+        if not analysis or not evaluation or not innovations:
+            return {'error': 'Missing required inputs (analysis, evaluation, or innovations)'}
+        print(f"✍️  Writer: Synthesizing grant proposal...")
+        try:
+            # Generate proposal
+            proposal = self._write_proposal(
+                analysis=analysis,
+                evaluation=evaluation,
+                innovations=innovations,
+                conflicts=conflicts
+            )
+            word_count = len(proposal.get('full_text', '').split())
+            print(f"✅ Writer: Proposal complete ({word_count} words)")
+            return proposal
+        except Exception as e:
+            print(f"❌ Writer error: {e}")
+            return {'error': str(e)}
+    def _write_proposal(
+        self,
+        analysis: Dict[str, Any],
+        evaluation: Dict[str, Any],
+        innovations: Dict[str, Any],
+        conflicts: list
+    ) -> Dict[str, Any]:
+        """Generate complete grant proposal"""
+        print("🧠 Writer: Generating proposal sections...")
+        # Generate each section
+        sections = {}
+        # 1. Executive Summary
+        print("   📝 Writing executive summary...")
+        sections['executive_summary'] = self._write_executive_summary(
+            analysis, evaluation, innovations
+        )
+        # 2. Project Description
+        print("   📝 Writing project description...")
+        sections['project_description'] = self._write_project_description(
+            analysis, evaluation
+        )
+        # 3. Research Plan
+        print("   📝 Writing research plan...")
+        sections['research_plan'] = self._write_research_plan(
+            analysis, innovations
+        )
+        # 4. Broader Impacts
+        print("   📝 Writing broader impacts...")
+        sections['broader_impacts'] = self._write_broader_impacts(
+            innovations
+        )
+        # 5. Budget Justification
+        print("   📝 Writing budget justification...")
+        sections['budget_justification'] = self._write_budget_justification(
+            innovations
+        )
+         # 6. Timeline
+        print("   📝 Creating timeline...")
+        sections['timeline'] = self._create_timeline(innovations)
+        # 7. References (placeholder)
+        sections['references'] = self._create_references(analysis)
+        # Assemble full proposal
+        full_text = self._assemble_proposal(sections, analysis)
+        # Handle conflicts if any
+        if conflicts:
+            conflict_resolution = self._resolve_conflicts(conflicts)
+            sections['conflict_resolution'] = conflict_resolution
+        return {
+            'proposal': sections,
+            'full_text': full_text,
+            'word_count': len(full_text.split()),
+            'metadata': {
+                'generated_at': datetime.now().isoformat(),
+                'paper_title': analysis.get('title', 'Unknown'),
+                'quality_score': evaluation.get('scores', {}).get('overall', 0),
+                'funding_potential': evaluation.get('funding_potential', 'UNKNOWN'),
+                'conflicts_resolved': len(conflicts)
+            }
+        }
+    def _write_executive_summary(
+        self,
+        analysis: Dict,
+        evaluation: Dict,
+        innovations: Dict
+    ) -> str:
+        """Generate executive summary (1 page)"""
+        prompt = f"""Write a compelling 1-page executive summary for a grant proposal based on:
+PAPER: {analysis.get('title', 'Unknown')}
+KEY FINDINGS:
+{json.dumps(analysis.get('key_contributions', []), indent=2)}
+QUALITY ASSESSMENT:
+Overall Score: {evaluation.get('scores', {}).get('overall', 0)}/10
+Funding Potential: {evaluation.get('funding_potential', 'UNKNOWN')}
+FUTURE DIRECTIONS:
+{json.dumps([d.get('direction') for d in innovations.get('future_directions', [])], indent=2)}
+Write an executive summary (250-300 words) that:
+1. Opens with a compelling hook about the problem
+2. Summarizes the key innovation
+3. Highlights intellectual merit
+4. Emphasizes broader impacts
+5. States funding request (assume $500K over 3 years)
+6. Ends with transformative potential
+Use persuasive, professional grant-writing style. Make it exciting but credible."""
+        try:
+            summary = self.llm.generate(
+                prompt=prompt,
+                max_tokens=500,
+                temperature=0.7
+            )
+            return summary.strip()
+        except Exception as e:
+            return f"[Executive Summary - Generation Error: {e}]"
+    def _write_project_description(
+        self,
+        analysis: Dict,
+        evaluation: Dict
+    ) -> str:
+        """Generate project description (2-3 pages)"""
+        prompt = f"""Write a detailed project description for a grant proposal:
+PAPER ANALYSIS:
+Title: {analysis.get('title')}
+Contributions: {json.dumps(analysis.get('key_contributions', []))}
+Methodology: {json.dumps(analysis.get('methodology', {}))}
+Results: {json.dumps(analysis.get('main_results', {}))}
+EVALUATION:
+Strengths: {json.dumps(evaluation.get('strengths', []))}
+Weaknesses: {json.dumps(evaluation.get('weaknesses', []))}
+Write 3-4 paragraphs covering:
+1. **Background & Motivation**: Why is this important?
+2. **Current State**: What has been done (cite the paper)?
+3. **Gap & Opportunity**: What's missing and why it matters
+4. **Proposed Work**: What we will do to address the gap
+Use clear, compelling academic writing. Be specific about technical details."""
+        try:
+            description = self.llm.generate(
+                prompt=prompt,
+                max_tokens=800,
+                temperature=0.6
+            )
+            return description.strip()
+        except Exception as e:
+            return f"[Project Description - Generation Error: {e}]"
+    def _write_research_plan(
+        self,
+        analysis: Dict,
+        innovations: Dict
+    ) -> str:
+        """Generate research plan with specific aims"""
+        prompt = f"""Write a detailed research plan with specific aims:
+CURRENT WORK:
+{json.dumps(analysis.get('key_contributions', []))}
+FUTURE DIRECTIONS:
+{json.dumps([{
+    'direction': d.get('direction'),
+    'description': d.get('description'),
+    'feasibility': d.get('feasibility')
+} for d in innovations.get('future_directions', [])[:3]], indent=2)}
+EXTENSIONS:
+{json.dumps([e.get('extension') for e in innovations.get('extensions', [])], indent=2)}
+Structure:
+**Aim 1: [First Direction]**
+- Rationale (why important)
+- Approach (how we'll do it)
+- Expected outcomes
+- Potential challenges and mitigation
+**Aim 2: [Second Direction]**
+- (same structure)
+**Aim 3: [Third Direction]**
+- (same structure)
+Write 2-3 paragraphs per aim. Be specific and technical."""
+        try:
+            plan = self.llm.generate(
+                prompt=prompt,
+                max_tokens=1200,
+                temperature=0.6
+            )
+            return plan.strip()
+        except Exception as e:
+            return f"[Research Plan - Generation Error: {e}]"
+    def _write_broader_impacts(self, innovations: Dict) -> str:
+        """Generate broader impacts statement"""
+        prompt = f"""Write a compelling broader impacts statement:
+APPLICATIONS:
+{json.dumps([{
+    'domain': a.get('domain'),
+    'application': a.get('application'),
+    'value': a.get('value_proposition')
+} for a in innovations.get('industry_applications', [])], indent=2)}
+COMMERCIAL POTENTIAL: {innovations.get('commercial_potential')}
+VISION:
+{innovations.get('ten_year_vision', '')}
+Write 2-3 paragraphs covering:
+1. **Societal Impact**: How will this benefit society?
+2. **Educational Impact**: Training, outreach, diversity
+3. **Economic Impact**: Jobs, innovation, competitiveness
+4. **Global Impact**: International collaboration, sustainability
+Be aspirational but realistic. Show transformative potential."""
+        try:
+            impacts = self.llm.generate(
+                prompt=prompt,
+                max_tokens=600,
+                temperature=0.7
+            )
+            return impacts.strip()
+        except Exception as e:
+            return f"[Broader Impacts - Generation Error: {e}]"
+    def _write_budget_justification(self, innovations: Dict) -> str:
+        """Generate budget justification"""
+        # Simple template budget
+        budget_template = """
+BUDGET JUSTIFICATION (3-Year Project, $500,000 Total)
+**Year 1: $180,000**
+- Personnel: $120,000 (PI 1 month summer, 1 Postdoc, 1 PhD student)
+- Equipment: $30,000 (GPU cluster, software licenses)
+- Travel: $15,000 (Conference presentations, collaborations)
+- Other: $15,000 (Cloud computing, datasets, publication fees)
+**Year 2: $160,000**
+- Personnel: $125,000 (Same team, cost-of-living adjustment)
+- Equipment: $10,000 (Additional computing resources)
+- Travel: $15,000 (Conferences, workshops)
+- Other: $10,000 (Materials, services)
+**Year 3: $160,000**
+- Personnel: $130,000 (Same team structure)
+- Travel: $20,000 (Final dissemination, collaborations)
+- Other: $10,000 (Publication, open-source release)
+**Justification:**
+This budget supports a lean, focused team to achieve the proposed aims. The postdoc
+will lead implementation, the PhD student will conduct experiments, and the PI will
+provide strategic direction. Equipment costs are essential for computational research.
+Travel enables dissemination and collaboration with key partners."""
+        return budget_template.strip()
+    def _create_timeline(self, innovations: Dict) -> Dict[str, list]:
+        """Create project timeline"""
+        timeline = {
+            'Year 1': [
+                'Q1: Literature review and baseline implementation',
+                'Q2: Aim 1 - Initial experiments and data collection',
+                'Q3: Aim 1 - Analysis and refinement',
+                'Q4: Aim 2 - Begin second direction'
+            ],
+            'Year 2': [
+                'Q1: Aim 2 - Core development',
+                'Q2: Aim 2 - Testing and validation',
+                'Q3: Aim 3 - Begin third direction',
+                'Q4: Integration and cross-validation'
+            ],
+            'Year 3': [
+                'Q1: Comprehensive evaluation',
+                'Q2: Real-world deployment and testing',
+                'Q3: Paper writing and submission',
+                'Q4: Open-source release and dissemination'
+            ]
+        }
+        return timeline
+    def _create_references(self, analysis: Dict) -> str:
+        """Create references section (placeholder)"""
+        # In real implementation, would extract from PDF
+        refs = f"""
+REFERENCES
+[1] {', '.join(analysis.get('authors', ['Unknown']))}. "{analysis.get('title', 'Unknown')}".
+    {analysis.get('venue', 'Conference/Journal')}, {analysis.get('year', 'Year')}.
+[2-10] Additional references would be extracted from the paper and added here...
+"""
+        return refs.strip()
+    def _resolve_conflicts(self, conflicts: list) -> str:
+        """Generate conflict resolution explanation"""
+        if not conflicts:
+            return "No conflicts to resolve."
+        prompt = f"""These agents disagreed during analysis:
+{json.dumps(conflicts, indent=2)}
+Write a brief paragraph explaining:
+1. What the disagreement was
+2. How we resolved it (weighted expert opinions, additional analysis, etc.)
+3. Why the final decision is sound
+Be diplomatic and show that diverse perspectives strengthen the proposal."""
+        try:
+            resolution = self.llm.generate(
+                prompt=prompt,
+                max_tokens=300,
+                temperature=0.6
+            )
+            return resolution.strip()
+        except Exception as e:
+            return f"[Conflict resolution failed: {e}]"
+    def _assemble_proposal(self, sections: Dict, analysis: Dict) -> str:
+        """Assemble all sections into formatted proposal"""
+        proposal = f"""
+{'='*70}
+GRANT PROPOSAL
+{'='*70}
+Title: Extension and Application of "{analysis.get('title', 'Unknown')}"
+Principal Investigator: [PI Name]
+Institution: [Institution]
+Duration: 3 years
+Requested Amount: $500,000
+{'='*70}
+EXECUTIVE SUMMARY
+{'-'*70}
+{sections.get('executive_summary', '[Missing]')}
+{'='*70}
+PROJECT DESCRIPTION
+{'-'*70}
+{sections.get('project_description', '[Missing]')}
+{'='*70}
+RESEARCH PLAN
+{'-'*70}
+{sections.get('research_plan', '[Missing]')}
+{'='*70}
+BROADER IMPACTS
+{'-'*70}
+{sections.get('broader_impacts', '[Missing]')}
+{'='*70}
+BUDGET JUSTIFICATION
+{'-'*70}
+{sections.get('budget_justification', '[Missing]')}
+{'='*70}
+PROJECT TIMELINE
+{'-'*70}
+"""
+        # Add timeline
+        timeline = sections.get('timeline', {})
+        for year, quarters in timeline.items():
+            proposal += f"\n{year}:\n"
+            for quarter in quarters:
+                proposal += f"  • {quarter}\n"
+        proposal += f"\n{'='*70}\n"
+        proposal += f"\nREFERENCES\n{'-'*70}\n"
+        proposal += sections.get('references', '[Missing]')
+        proposal += f"\n\n{'='*70}\n"
+        proposal += f"END OF PROPOSAL\n"
+        proposal += f"{'='*70}\n"
+        return proposal
+# ==================== DEMO ====================
+def demo_writer():
+    """Demo the Writer Agent"""
+    print("="*60)
+    print("✍️  WRITER AGENT DEMO")
+    print("="*60)
+    print()
+    # Initialize dependencies
+    from tools.llm_wrapper import LLMWrapper
+    from demo_phase1 import MessageQueue
+    llm = LLMWrapper(model='fast')
+    queue = MessageQueue()
+    # Create writer agent
+    writer = WriterAgent(queue, llm)
+    print(f"✅ {writer.name} initialized")
+    print(f"   Role: {writer.role}")
+    print()
+    # Mock inputs from other agents
+    mock_data = {
+        'analysis': {
+            'title': 'Attention Is All You Need',
+            'authors': ['Vaswani et al.'],
+            'year': 2017,
+            'venue': 'NeurIPS',
+            'key_contributions': [
+                'Introduced Transformer architecture',
+                'Eliminated recurrence',
+                'Achieved SOTA translation'
+            ],
+            'methodology': {
+                'approach': 'Self-attention mechanism',
+                'datasets': ['WMT 2014']
+            },
+            'main_results': {
+                'summary': 'Best translation performance',
+                'performance_improvements': ['28.4 BLEU']
+            }
+        },
+        'evaluation': {
+            'scores': {'overall': 9},
+            'funding_potential': 'HIGH',
+            'strengths': ['Novel architecture', 'Strong results'],
+            'weaknesses': ['Memory complexity']
+        },
+        'innovations': {
+            'future_directions': [
+                {
+                    'direction': 'Efficient attention mechanisms',
+                    'description': 'Reduce O(n²) complexity',
+                    'feasibility': 'HIGH'
+                }
+            ],
+            'industry_applications': [
+                {
+                    'domain': 'Healthcare',
+                    'application': 'Medical text analysis',
+                    'value_proposition': 'Faster diagnosis'
+                }
+            ],
+            'extensions': [{'extension': 'Sparse attention'}],
+            'commercial_potential': 'HIGH',
+            'ten_year_vision': 'Ubiquitous AI translation'
+        }
+    }
+    # Create test message
+    test_message = Message(
+        sender="supervisor",
+        recipient="writer",
+        message_type=MessageType.REQUEST,
+        content={
+            'action': 'write_proposal',
+            **mock_data
+        }
+    )
+    # Process
+    print("🧪 Generating grant proposal...")
+    print()
+    result = writer.process(test_message)
+    print("\n" + "="*60)
+    print("✍️  PROPOSAL GENERATED")
+    print("="*60)
+    if 'error' not in result:
+        print(f"\n📊 Metadata:")
+        metadata = result.get('metadata', {})
+        for key, value in metadata.items():
+            print(f"   {key}: {value}")
+        print(f"\n📄 Proposal Preview (first 1000 chars):")
+        print("-" * 60)
+        full_text = result.get('full_text', '')
+        print(full_text[:1000])
+        print("...\n[truncated]\n")
+        # Option to save
+        save = input("Save full proposal to file? (y/n): ").strip().lower()
+        if save == 'y':
+            filename = "grant_proposal.txt"
+            with open(filename, 'w', encoding='utf-8') as f:
+                f.write(full_text)
+            print(f"✅ Saved to {filename}")
+    else:
+        print(f"\n❌ Error: {result['error']}")
+    print("\n✅ Demo complete!")
+if __name__ == "__main__":
+    demo_writer()

tools/__init__.py ADDED Viewed

	@@ -0,0 +1,9 @@

+"""
+tools package
+Utility tools for the multi-agent system
+"""
+from .llm_wrapper import LLMWrapper, create_llm
+from .pdf_reader import PDFReader
+__all__ = ['LLMWrapper', 'create_llm', 'PDFReader']

tools/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (367 Bytes). View file

tools/__pycache__/llm_wrapper.cpython-310.pyc ADDED Viewed

Binary file (8.33 kB). View file

tools/__pycache__/pdf_reader.cpython-310.pyc ADDED Viewed

Binary file (11.4 kB). View file

tools/llm_wrapper.py ADDED Viewed

	@@ -0,0 +1,369 @@

+"""
+tools/llm_wrapper.py
+Unified LLM interface using Groq for fast inference
+"""
+from groq import Groq
+import json
+import os
+from typing import Dict, Any, Optional, List
+from dotenv import load_dotenv
+import time
+import re
+class LLMWrapper:
+    """
+     Unified interface for LLM operations using Groq
+     Supports multiple models with automatic fallback
+    """
+    # Available Groq models (in order of preference)
+    MODELS = {
+        'best': 'llama-3.1-70b-versatile',     # Highest quality
+        'fast': 'llama-3.1-8b-instant',        # Fastest
+        'reasoning': 'mixtral-8x7b-32768',     # Good for analysis
+        'efficient': 'gemma-7b-it'             # Most efficient
+    }
+    def __init__(self, api_key: Optional[str] = None, model: str = 'best'):
+         """
+        Initialize Groq client
+        Args:
+            api_key: Groq API key (or loads from .env)
+            model: Model preference ('best', 'fast', 'reasoning', 'efficient')
+        """
+         # Load environment variables
+         load_dotenv()
+          # Get API key
+         self.api_key = api_key or os.getenv("GROQ_API_KEY")
+         if not self.api_key:
+            raise ValueError(
+                "GROQ_API_KEY not found! "
+                "Set it in .env file or pass as parameter. "
+                "Get free key: https://console.groq.com/keys"
+            )
+          # Initialize client
+         self.client = Groq(api_key=self.api_key)
+          # Set model
+         self.model = self.MODELS.get(model, self.MODELS['best'])
+         # Stats tracking
+         self.total_tokens = 0
+         self.total_calls = 0
+         self.total_errors = 0
+         print(f"✅ Groq LLM initialized with model: {self.model}")
+    def generate(
+        self,
+        prompt: str,
+        max_tokens: int = 1000,
+        temperature: float = 0.7, # This parameter is used to make the output midly creative
+        system_prompt: Optional[str] = None
+    ) -> str:
+          """
+        Generate text response
+        Args:
+            prompt: User prompt
+            max_tokens: Maximum tokens to generate
+            temperature: Sampling temperature (0.0-1.0)
+            system_prompt: Optional system instruction
+        Returns:
+            Generated text
+        """
+          try:
+            # Build messages
+            messages = []
+            if system_prompt:
+                messages.append({
+                    "role": "system",
+                    "content": system_prompt
+                })
+            messages.append({
+                "role": "user",
+                "content": prompt
+            })
+             # Call Groq API
+            start_time = time.time()
+            response = self.client.chat.completions.create(
+                model=self.model,
+                messages=messages,
+                max_tokens=max_tokens,
+                temperature=temperature,
+                top_p=0.95
+            )
+            elapsed = time.time() - start_time
+            # Extract response
+            result = response.choices[0].message.content
+            # Update stats
+            self.total_tokens += response.usage.total_tokens
+            self.total_calls += 1
+            print(f"✅ LLM call completed in {elapsed:.2f}s ({response.usage.total_tokens} tokens)")
+            return result
+          except Exception as e:
+            self.total_errors += 1
+            print(f"❌ LLM error: {e}")
+            raise
+    def generate_structured(
+        self,
+        prompt: str,
+        schema: Dict[str, Any],
+        max_tokens: int = 2000,
+        temperature: float = 0.3
+    ) -> Dict[str, Any]:
+        """
+        Generate JSON response matching a schema
+        Args:
+            prompt: User prompt
+            schema: Expected JSON schema
+            max_tokens: Maximum tokens
+            temperature: Lower for more deterministic JSON
+        Returns:
+            Parsed JSON object
+        """
+        # Add JSON instruction to prompt
+        schema_str = json.dumps(schema, indent=2)
+        full_prompt = f"""{prompt}
+IMPORTANT: Respond with ONLY valid JSON matching this schema:
+{schema_str}
+Do not include any explanation or markdown formatting.
+Return pure JSON that can be parsed directly."""
+        system_prompt = "You are a precise JSON generator. Always return valid JSON with no additional text."
+        # Generate response
+        response_text = self.generate(
+            prompt=full_prompt,
+            max_tokens=max_tokens,
+            temperature=temperature,
+            system_prompt=system_prompt
+        )
+        # Parse JSON
+        try:
+            # Try direct parse
+            return json.loads(response_text)
+        except json.JSONDecodeError:
+            # Try to extract JSON from markdown code blocks
+            json_match = re.search(r'```json\s*\n(.*?)\n```', response_text, re.DOTALL)
+            if json_match:
+                return json.loads(json_match.group(1))
+            # Try to find JSON object in text
+            json_match = re.search(r'\{.*\}', response_text, re.DOTALL)
+            if json_match:
+                return json.loads(json_match.group(0))
+            # If all fails, raise error
+            raise ValueError(f"Could not parse JSON from response: {response_text[:200]}")
+    def generate_with_retry(
+        self,
+        prompt: str,
+        max_retries: int = 3,
+        **kwargs
+    ) -> str:
+        """
+        Generate with automatic retry on failure
+        Args:
+            prompt: User prompt
+            max_retries: Maximum retry attempts
+            **kwargs: Additional arguments for generate()
+        Returns:
+            Generated text
+        """
+        last_error = None
+        for attempt in range(max_retries):
+            try:
+                return self.generate(prompt, **kwargs)
+            except Exception as e:
+                last_error = e
+                print(f"⚠️ Attempt {attempt + 1} failed: {e}")
+                if attempt < max_retries - 1:
+                    time.sleep(2 ** attempt)  # Exponential backoff
+        raise last_error
+    def batch_generate(
+        self,
+        prompts: List[str],
+        max_tokens: int = 1000,
+        temperature: float = 0.7
+    ) -> List[str]:
+        """
+        Generate responses for multiple prompts
+        Args:
+            prompts: List of prompts
+            max_tokens: Max tokens per response
+            temperature: Sampling temperature
+        Returns:
+            List of responses
+        """
+        results = []
+        for i, prompt in enumerate(prompts):
+            print(f"Processing prompt {i + 1}/{len(prompts)}...")
+            try:
+                response = self.generate(
+                    prompt=prompt,
+                    max_tokens=max_tokens,
+                    temperature=temperature
+                )
+                results.append(response)
+            except Exception as e:
+                print(f"❌ Prompt {i + 1} failed: {e}")
+                results.append(None)
+            # Rate limiting (30 req/min = 2 sec between calls)
+            if i < len(prompts) - 1:
+                time.sleep(2)
+        return results
+    def count_tokens(self, text: str) -> int:
+        """
+        Estimate token count (rough approximation)
+        Args:
+            text: Input text
+        Returns:
+            Estimated token count
+        """
+        # Rough estimate: 1 token ≈ 4 characters
+        return len(text) // 4
+    def get_stats(self) -> Dict[str, Any]:
+        """Get usage statistics"""
+        return {
+            'total_calls': self.total_calls,
+            'total_tokens': self.total_tokens,
+            'total_errors': self.total_errors,
+            'model': self.model,
+            'avg_tokens_per_call': self.total_tokens / max(self.total_calls, 1)
+        }
+    def reset_stats(self):
+        """Reset usage statistics"""
+        self.total_tokens = 0
+        self.total_calls = 0
+        self.total_errors = 0
+# ==================== HELPER FUNCTIONS ====================
+def create_llm(model: str = 'best', api_key: Optional[str] = None) -> LLMWrapper:
+    """
+    Convenience function to create LLM wrapper
+    Args:
+        model: Model preference
+        api_key: Optional API key
+    Returns:
+        LLMWrapper instance
+    """
+    return LLMWrapper(api_key=api_key, model=model)
+# ==================== DEMO & TESTING ====================
+def demo_llm():
+    """Demonstrate LLM wrapper functionality"""
+    print("="*60)
+    print("🤖 GROQ LLM WRAPPER DEMO")
+    print("="*60)
+    print()
+    # Initialize LLM
+    llm = LLMWrapper(model='fast')  # Use fast model for demo
+     # Test 1: Simple generation
+    print("\n📝 Test 1: Simple Text Generation")
+    print("-" * 60)
+    response = llm.generate(
+        prompt="Explain what a research paper abstract is in one sentence.",
+        max_tokens=100,
+        temperature=0.7
+    )
+    print(f"Response: {response}")
+    # Test 2: Structured JSON generation
+    print("\n📊 Test 2: Structured JSON Output")
+    print("-" * 60)
+    schema = {
+        "title": "string",
+        "summary": "string",
+        "key_points": ["string", "string", "string"]
+    }
+    json_response = llm.generate_structured(
+        prompt="Summarize what makes a good research paper in 3 key points.",
+        schema=schema,
+        temperature=0.3
+    )
+    print(json.dumps(json_response, indent=2))
+    # Test 3: Batch generation
+    print("\n🔄 Test 3: Batch Processing")
+    print("-" * 60)
+    prompts = [
+        "Name one benefit of peer review.",
+        "Name one challenge in academic publishing.",
+        "Name one trend in AI research."
+    ]
+    batch_results = llm.batch_generate(prompts, max_tokens=50)
+    for i, result in enumerate(batch_results, 1):
+        print(f"{i}. {result}")
+    # Show stats
+    print("\n📈 Usage Statistics")
+    print("-" * 60)
+    stats = llm.get_stats()
+    print(json.dumps(stats, indent=2))
+    print("\n✅ Demo complete!")
+if __name__ == "__main__":
+    demo_llm()

tools/pdf_reader.py ADDED Viewed

	@@ -0,0 +1,509 @@

+"""
+tools/pdf_reader.py
+Extract text and metadata from research papers (PDF)
+"""
+from pypdf import PdfReader
+from typing import Dict, Any, Optional, List
+import re
+import os
+class PDFReader:
+    """
+    PDF extraction tool for research papers
+    Features:
+    - Extract full text
+    - Extract metadata (title, author, etc.)
+    - Identify abstract
+    - Extract sections
+    - Handle multi-column layouts
+    """
+    def __init__(self):
+        self.supported_extensions = ['.pdf']
+        print("✅ PDF Reader initialized")
+    def extract_text(self, pdf_path: str, max_pages: Optional[int] = None) -> str:
+        """
+        Extract all text from PDF
+        Args:
+            pdf_path: Path to PDF file
+            max_pages: Maximum pages to extract (None = all)
+        Returns:
+            Extracted text as string
+        """
+        if not os.path.exists(pdf_path):
+            raise FileNotFoundError(f"PDF not found: {pdf_path}")
+        if not pdf_path.lower().endswith('.pdf'):
+            raise ValueError(f"Not a PDF file: {pdf_path}")
+        print(f"📖 Reading PDF: {pdf_path}")
+        try:
+            reader = PdfReader(pdf_path)
+            num_pages = len(reader.pages)
+            print(f"   Pages: {num_pages}")
+            # Extract text from pages
+            text_parts = []
+            pages_to_read = min(num_pages, max_pages) if max_pages else num_pages
+            for i in range(pages_to_read):
+                page = reader.pages[i]
+                page_text = page.extract_text()
+                text_parts.append(page_text)
+                if (i + 1) % 10 == 0:
+                    print(f"   Processed {i + 1}/{pages_to_read} pages...")
+            full_text = '\n\n'.join(text_parts)
+            print(f"✅ Extracted {len(full_text)} characters from {pages_to_read} pages")
+            return full_text
+        except Exception as e:
+            print(f"❌ PDF extraction error: {e}")
+            raise
+    def get_paper_info(self, pdf_path: str) -> Dict[str, Any]:
+        """
+        Extract metadata and basic info from PDF
+        Returns:
+        {
+            'metadata': {...},
+            'num_pages': int,
+            'abstract': str,
+            'sections': [...]
+        }
+        """
+        print(f"📊 Extracting paper info from: {pdf_path}")
+        try:
+            reader = PdfReader(pdf_path)
+            # Get metadata
+            metadata = {}
+            if reader.metadata:
+                metadata = {
+                    'title': reader.metadata.get('/Title', ''),
+                    'author': reader.metadata.get('/Author', ''),
+                    'subject': reader.metadata.get('/Subject', ''),
+                    'creator': reader.metadata.get('/Creator', ''),
+                    'producer': reader.metadata.get('/Producer', ''),
+                    'creation_date': str(reader.metadata.get('/CreationDate', '')),
+                }
+            # Get number of pages
+            num_pages = len(reader.pages)
+            # Extract first few pages for abstract detection
+            first_pages_text = ''
+            for i in range(min(3, num_pages)):  # Check first 3 pages
+                first_pages_text += reader.pages[i].extract_text() + '\n\n'
+            # Try to extract abstract
+            abstract = self._extract_abstract(first_pages_text)
+            # Try to identify sections
+            sections = self._extract_sections(first_pages_text)
+            info = {
+                'metadata': metadata,
+                'num_pages': num_pages,
+                'abstract': abstract,
+                'sections': sections,
+                'file_path': pdf_path,
+                'file_size': os.path.getsize(pdf_path)
+            }
+            print(f"✅ Paper info extracted:")
+            print(f"   Title: {metadata.get('title', 'Not found')[:50]}...")
+            print(f"   Pages: {num_pages}")
+            print(f"   Abstract: {'Found' if abstract else 'Not found'}")
+            return info
+        except Exception as e:
+            print(f"❌ Error extracting paper info: {e}")
+            return {
+                'metadata': {},
+                'num_pages': 0,
+                'abstract': '',
+                'sections': [],
+                'error': str(e)
+            }
+    def _extract_abstract(self, text: str) -> str:
+        """Try to extract abstract from paper text"""
+        # Look for "Abstract" section
+        # Common patterns:
+        # - "Abstract\n"
+        # - "ABSTRACT\n"
+        # - "Abstract—"
+        # - "Abstract:"
+        patterns = [
+            r'(?i)abstract[:\-—]\s*(.*?)(?=\n\s*\n|\n\s*1\.|\n\s*introduction|$)',
+            r'(?i)abstract\s*\n\s*(.*?)(?=\n\s*\n|\n\s*1\.|\n\s*introduction|$)',
+        ]
+        for pattern in patterns:
+            match = re.search(pattern, text, re.DOTALL | re.IGNORECASE)
+            if match:
+                abstract = match.group(1).strip()
+                # Clean up abstract
+                abstract = re.sub(r'\s+', ' ', abstract)  # Remove extra whitespace
+                abstract = abstract[:1000]  # Limit length
+                if len(abstract) > 50:  # Must be substantial
+                    return abstract
+        return ''
+    def _extract_sections(self, text: str) -> List[str]:
+        """Try to identify paper sections"""
+        # Common section patterns
+        section_patterns = [
+            r'(?i)^\s*\d+\.?\s+(introduction|background|related work|methodology|method|approach|experiments?|results?|evaluation|discussion|conclusion|references?)',
+            r'(?i)^\s*(introduction|background|related work|methodology|method|approach|experiments?|results?|evaluation|discussion|conclusion)\s*\n'
+        ]
+        sections = []
+        for pattern in section_patterns:
+            matches = re.finditer(pattern, text, re.MULTILINE)
+            for match in matches:
+                section_name = match.group(1).strip()
+                if section_name.lower() not in [s.lower() for s in sections]:
+                    sections.append(section_name.title())
+        return sections
+    def extract_page_range(
+        self,
+        pdf_path: str,
+        start_page: int,
+        end_page: int
+    ) -> str:
+        """Extract text from specific page range"""
+        try:
+            reader = PdfReader(pdf_path)
+            num_pages = len(reader.pages)
+            # Validate range
+            start_page = max(0, min(start_page, num_pages - 1))
+            end_page = max(start_page, min(end_page, num_pages - 1))
+            text_parts = []
+            for i in range(start_page, end_page + 1):
+                text_parts.append(reader.pages[i].extract_text())
+            return '\n\n'.join(text_parts)
+        except Exception as e:
+            print(f"❌ Error extracting page range: {e}")
+            return ''
+    def search_text(self, pdf_path: str, search_term: str) -> List[Dict[str, Any]]:
+        """
+        Search for text in PDF
+        Returns list of matches with page numbers and context
+        """
+        print(f"🔍 Searching for '{search_term}' in {pdf_path}")
+        try:
+            reader = PdfReader(pdf_path)
+            matches = []
+            for page_num, page in enumerate(reader.pages):
+                text = page.extract_text()
+                # Find all occurrences
+                pattern = re.compile(re.escape(search_term), re.IGNORECASE)
+                for match in pattern.finditer(text):
+                    start = max(0, match.start() - 50)
+                    end = min(len(text), match.end() + 50)
+                    context = text[start:end]
+                    matches.append({
+                        'page': page_num + 1,
+                        'context': context,
+                        'position': match.start()
+                    })
+            print(f"✅ Found {len(matches)} matches")
+            return matches
+        except Exception as e:
+            print(f"❌ Search error: {e}")
+            return []
+    def extract_references(self, pdf_path: str) -> List[str]:
+        """Try to extract references/bibliography"""
+        print(f"📚 Extracting references from {pdf_path}")
+        try:
+            reader = PdfReader(pdf_path)
+            num_pages = len(reader.pages)
+            # References usually in last few pages
+            last_pages_text = ''
+            start_page = max(0, num_pages - 5)
+            for i in range(start_page, num_pages):
+                last_pages_text += reader.pages[i].extract_text() + '\n\n'
+            # Look for references section
+            ref_pattern = r'(?i)(references?|bibliography)\s*\n\s*(.*?)(?=\n\s*appendix|\Z)'
+            match = re.search(ref_pattern, last_pages_text, re.DOTALL)
+            if match:
+                ref_text = match.group(2)
+                # Split into individual references
+                # Common patterns: [1], (1), 1., numbered lines
+                ref_lines = ref_text.split('\n')
+                references = []
+                current_ref = ''
+                for line in ref_lines:
+                    line = line.strip()
+                    # Check if new reference (starts with number)
+                    if re.match(r'^\[?\d+\]?\.?\s+', line):
+                        if current_ref:
+                            references.append(current_ref.strip())
+                        current_ref = line
+                    else:
+                        current_ref += ' ' + line
+                if current_ref:
+                    references.append(current_ref.strip())
+                print(f"✅ Extracted {len(references)} references")
+                return references[:50]  # Limit to first 50
+            return []
+        except Exception as e:
+            print(f"❌ Error extracting references: {e}")
+            return []
+    def get_text_stats(self, pdf_path: str) -> Dict[str, Any]:
+        """Get statistics about the PDF text"""
+        try:
+            text = self.extract_text(pdf_path)
+            stats = {
+                'total_characters': len(text),
+                'total_words': len(text.split()),
+                'total_lines': len(text.split('\n')),
+                'estimated_tokens': len(text) // 4,  # Rough estimate
+                'avg_word_length': sum(len(word) for word in text.split()) / max(len(text.split()), 1)
+            }
+            return stats
+        except Exception as e:
+            return {'error': str(e)}
+    def validate_pdf(self, pdf_path: str) -> Dict[str, Any]:
+        """Validate if PDF is readable and get basic info"""
+        validation = {
+            'valid': False,
+            'exists': False,
+            'is_pdf': False,
+            'readable': False,
+            'num_pages': 0,
+            'has_text': False,
+            'errors': []
+        }
+        # Check existence
+        if not os.path.exists(pdf_path):
+            validation['errors'].append('File does not exist')
+            return validation
+        validation['exists'] = True
+        # Check extension
+        if not pdf_path.lower().endswith('.pdf'):
+            validation['errors'].append('Not a PDF file')
+            return validation
+        validation['is_pdf'] = True
+        # Try to read
+        try:
+            reader = PdfReader(pdf_path)
+            validation['readable'] = True
+            validation['num_pages'] = len(reader.pages)
+            # Check if has extractable text
+            if validation['num_pages'] > 0:
+                sample_text = reader.pages[0].extract_text()
+                if len(sample_text.strip()) > 50:
+                    validation['has_text'] = True
+                    validation['valid'] = True
+                else:
+                    validation['errors'].append('PDF has no extractable text (may be scanned image)')
+            else:
+                validation['errors'].append('PDF has no pages')
+        except Exception as e:
+            validation['errors'].append(f'Read error: {str(e)}')
+        return validation
+# ==================== HELPER FUNCTIONS ====================
+def clean_text(text: str) -> str:
+    """Clean extracted PDF text"""
+    # Remove excessive whitespace
+    text = re.sub(r'\s+', ' ', text)
+    # Remove page numbers (common patterns)
+    text = re.sub(r'\n\s*\d+\s*\n', '\n', text)
+    # Remove headers/footers (heuristic: short lines at top/bottom)
+    lines = text.split('\n')
+    cleaned_lines = []
+    for line in lines:
+        # Skip very short lines that might be headers/footers
+        if len(line.strip()) > 20:
+            cleaned_lines.append(line)
+    return '\n'.join(cleaned_lines)
+def extract_tables(text: str) -> List[str]:
+    """Try to identify table-like structures in text"""
+    tables = []
+    lines = text.split('\n')
+    # Look for lines with multiple tabs or aligned columns
+    table_lines = []
+    for line in lines:
+        # Heuristic: if line has 3+ tabs or multiple sequences of spaces
+        if line.count('\t') >= 3 or len(re.findall(r'\s{3,}', line)) >= 3:
+            table_lines.append(line)
+        elif table_lines:
+            # End of table
+            if len(table_lines) >= 3:
+                tables.append('\n'.join(table_lines))
+            table_lines = []
+    return tables
+# ==================== DEMO ====================
+def demo_pdf_reader():
+    """Demo the PDF Reader"""
+    print("="*60)
+    print("📄 PDF READER DEMO")
+    print("="*60)
+    print()
+    reader = PDFReader()
+    # Ask for PDF path
+    print("Enter path to a PDF research paper to test:")
+    pdf_path = input("Path: ").strip()
+    if not pdf_path:
+        print("⏭️  No path provided, exiting demo")
+        return
+    print()
+ # Validate PDF
+    print("🔍 Validating PDF...")
+    validation = reader.validate_pdf(pdf_path)
+    print(f"Valid: {validation['valid']}")
+    if not validation['valid']:
+        print(f"❌ Errors: {validation['errors']}")
+        return
+    print()
+ # Get paper info
+    print("📊 Extracting paper info...")
+    info = reader.get_paper_info(pdf_path)
+    print(f"\nMetadata:")
+    for key, value in info['metadata'].items():
+        if value:
+            print(f"  {key}: {value}")
+    print(f"\nPages: {info['num_pages']}")
+    print(f"File size: {info['file_size']:,} bytes")
+    if info['abstract']:
+        print(f"\nAbstract (first 200 chars):")
+        print(f"  {info['abstract'][:200]}...")
+    if info['sections']:
+        print(f"\nSections found: {', '.join(info['sections'])}")
+    print()
+    # Extract text
+    print("📖 Extracting full text (first 5 pages)...")
+    text = reader.extract_text(pdf_path, max_pages=5)
+    print(f"\nExtracted text (first 500 chars):")
+    print(f"  {text[:500]}...")
+    # Get stats
+    print("\n📈 Text statistics:")
+    stats = reader.get_text_stats(pdf_path)
+    for key, value in stats.items():
+        print(f"  {key}: {value}")
+    # Search test
+    print("\n🔍 Search test:")
+    search_term = input("Enter term to search (or Enter to skip): ").strip()
+    if search_term:
+        matches = reader.search_text(pdf_path, search_term)
+        print(f"\nFound {len(matches)} matches:")
+        for i, match in enumerate(matches[:3], 1):
+            print(f"\n  {i}. Page {match['page']}:")
+            print(f"     ...{match['context']}...")
+    print("\n✅ Demo complete!")
+if __name__ == "__main__":
+    demo_pdf_reader