Spaces:

Yusufarsh
/

ReproAgent

Runtime error

App Files Files Community

Yusufarsh commited on Apr 26

Commit

c8d0576

verified ·

1 Parent(s): 358b88c

Upload 13 files

Browse files

Files changed (13) hide show

.gradio/certificate.pem +31 -0
agents/__init__.py +15 -0
agents/__pycache__/__init__.cpython-312.pyc +0 -0
agents/__pycache__/debugger.cpython-312.pyc +0 -0
agents/__pycache__/paper_parser.cpython-312.pyc +0 -0
agents/__pycache__/reasoning_agent.cpython-312.pyc +0 -0
agents/__pycache__/repo_analyzer.cpython-312.pyc +0 -0
agents/debugger.py +284 -0
agents/paper_parser.py +319 -0
agents/reasoning_agent.py +508 -0
agents/repo_analyzer.py +338 -0
assets/loss_plot.png +0 -0
assets/reward_plot.png +0 -0

.gradio/certificate.pem ADDED Viewed

	@@ -0,0 +1,31 @@

+-----BEGIN CERTIFICATE-----
+MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
+TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
+cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
+WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
+ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
+MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
+h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
+0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
+A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
+T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
+B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
+B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
+KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
+OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
+jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
+qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
+rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
+HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
+hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
+ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
+3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
+NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
+ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
+TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
+jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
+oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
+4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
+mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
+emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
+-----END CERTIFICATE-----

agents/__init__.py ADDED Viewed

	@@ -0,0 +1,15 @@

+"""
+Agent implementations for ReproAgent.
+"""
+from agents.paper_parser import PaperParser
+from agents.repo_analyzer import RepoAnalyzer
+from agents.debugger import Debugger
+from agents.reasoning_agent import ReasoningAgent
+__all__ = [
+    'PaperParser',
+    'RepoAnalyzer',
+    'Debugger',
+    'ReasoningAgent'
+]

agents/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (482 Bytes). View file

agents/__pycache__/debugger.cpython-312.pyc ADDED Viewed

Binary file (9.31 kB). View file

agents/__pycache__/paper_parser.cpython-312.pyc ADDED Viewed

Binary file (11.9 kB). View file

agents/__pycache__/reasoning_agent.cpython-312.pyc ADDED Viewed

Binary file (24.6 kB). View file

agents/__pycache__/repo_analyzer.cpython-312.pyc ADDED Viewed

Binary file (12.9 kB). View file

agents/debugger.py ADDED Viewed

	@@ -0,0 +1,284 @@

+"""
+Debugging agent - analyzes and fixes code errors.
+"""
+import re
+from typing import Dict, Any, List, Optional, Tuple
+from reproagent.models import LLMClient
+class Debugger:
+    """
+    Debugging agent that:
+    1. Analyzes error messages
+    2. Searches for solutions
+    3. Proposes fixes
+    4. Applies patches
+    """
+    def __init__(self, llm_client: LLMClient):
+        """
+        Args:
+            llm_client: LLM for error analysis
+        """
+        self.llm = llm_client
+        # Common error patterns
+        self.error_patterns = {
+            'ImportError': r'ImportError: No module named [\'"](.+)[\'"]',
+            'ModuleNotFoundError': r'ModuleNotFoundError: No module named [\'"](.+)[\'"]',
+            'FileNotFoundError': r'FileNotFoundError: \[Errno 2\] No such file or directory: [\'"](.+)[\'"]',
+            'RuntimeError': r'RuntimeError: (.+)',
+            'ValueError': r'ValueError: (.+)',
+            'TypeError': r'TypeError: (.+)',
+            'AttributeError': r'AttributeError: (.+)',
+        }
+    def analyze_error(self, error_message: str, code_context: Optional[str] = None) -> Dict[str, Any]:
+        """
+        Analyze error and determine cause.
+        Args:
+            error_message: Full error message/traceback
+            code_context: Relevant code snippet (optional)
+        Returns:
+            Analysis dict with error type, cause, and suggested fixes
+        """
+        print(f"🔍 Analyzing error...")
+        # Classify error type
+        error_type = self._classify_error(error_message)
+        # Extract error details
+        error_details = self._extract_error_details(error_message, error_type)
+        # Get LLM analysis
+        llm_analysis = self._llm_analyze_error(error_message, code_context)
+        analysis = {
+            'error_type': error_type,
+            'error_details': error_details,
+            'root_cause': llm_analysis.get('root_cause', 'Unknown'),
+            'suggested_fixes': llm_analysis.get('fixes', []),
+            'confidence': llm_analysis.get('confidence', 0.5)
+        }
+        print(f"✅ Error analyzed: {error_type}")
+        print(f"   Cause: {analysis['root_cause']}")
+        return analysis
+    def _classify_error(self, error_message: str) -> str:
+        """Classify error type."""
+        for error_type, pattern in self.error_patterns.items():
+            if re.search(pattern, error_message):
+                return error_type
+        # Check for common error types in message
+        if 'import' in error_message.lower():
+            return 'ImportError'
+        elif 'file' in error_message.lower() and 'not found' in error_message.lower():
+            return 'FileNotFoundError'
+        elif 'cuda' in error_message.lower() or 'gpu' in error_message.lower():
+            return 'CUDAError'
+        elif 'memory' in error_message.lower():
+            return 'MemoryError'
+        return 'UnknownError'
+    def _extract_error_details(self, error_message: str, error_type: str) -> Dict[str, str]:
+        """Extract specific details from error."""
+        details = {}
+        if error_type in self.error_patterns:
+            pattern = self.error_patterns[error_type]
+            match = re.search(pattern, error_message)
+            if match:
+                details['detail'] = match.group(1)
+        # Extract file and line number
+        file_pattern = r'File "(.+)", line (\d+)'
+        file_match = re.search(file_pattern, error_message)
+        if file_match:
+            details['file'] = file_match.group(1)
+            details['line'] = file_match.group(2)
+        return details
+    def _llm_analyze_error(self, error_message: str, code_context: Optional[str]) -> Dict[str, Any]:
+        """Use LLM to analyze error."""
+        prompt = f"""
+Analyze this Python error and provide solutions.
+Error:
+{error_message[:1000]}
+"""
+        if code_context:
+            prompt += f"\n\nRelevant code:\n{code_context[:500]}"
+        prompt += """
+Respond with JSON:
+{
+    "root_cause": "explanation of what caused the error",
+    "fixes": ["fix 1", "fix 2", "fix 3"],
+    "confidence": 0.9
+}
+"""
+        try:
+            result = self.llm.generate_structured(prompt)
+            return result
+        except:
+            return self._fallback_analysis(error_message)
+    def _fallback_analysis(self, error_message: str) -> Dict[str, Any]:
+        """Fallback analysis without LLM."""
+        # Common fixes for common errors
+        fixes = []
+        if 'ModuleNotFoundError' in error_message or 'ImportError' in error_message:
+            match = re.search(r"module named ['\"](.+)['\"]", error_message)
+            if match:
+                module = match.group(1)
+                fixes = [
+                    f"Install missing package: pip install {module}",
+                    f"Check if {module} is in requirements.txt",
+                    "Activate correct virtual environment"
+                ]
+        elif 'FileNotFoundError' in error_message:
+            fixes = [
+                "Check if file path is correct",
+                "Ensure data is downloaded",
+                "Check working directory"
+            ]
+        elif 'CUDA' in error_message or 'GPU' in error_message:
+            fixes = [
+                "Check CUDA installation",
+                "Verify GPU availability",
+                "Try running on CPU: device='cpu'"
+            ]
+        elif 'memory' in error_message.lower():
+            fixes = [
+                "Reduce batch size",
+                "Use gradient accumulation",
+                "Clear GPU cache: torch.cuda.empty_cache()"
+            ]
+        return {
+            'root_cause': 'Error detected',
+            'fixes': fixes or ['Debug manually'],
+            'confidence': 0.6
+        }
+    def generate_fix(self, error_analysis: Dict[str, Any]) -> str:
+        """
+        Generate code fix based on error analysis.
+        Args:
+            error_analysis: Output from analyze_error()
+        Returns:
+            Fix as code or command
+        """
+        error_type = error_analysis['error_type']
+        details = error_analysis['error_details']
+        # Generate specific fix based on error type
+        if error_type in ['ImportError', 'ModuleNotFoundError']:
+            module = details.get('detail', '')
+            return f"pip install {module}"
+        elif error_type == 'FileNotFoundError':
+            file_path = details.get('detail', '')
+            return f"# Check if {file_path} exists or download it"
+        elif error_type == 'CUDAError':
+            return "# Try: model.to('cpu') or install CUDA"
+        elif error_type == 'MemoryError':
+            return "# Reduce batch_size or use gradient accumulation"
+        # Use LLM for complex fixes
+        return self._llm_generate_fix(error_analysis)
+    def _llm_generate_fix(self, error_analysis: Dict[str, Any]) -> str:
+        """Use LLM to generate code fix."""
+        prompt = f"""
+Generate a code fix for this error:
+Error Type: {error_analysis['error_type']}
+Root Cause: {error_analysis['root_cause']}
+Provide the fix as Python code or shell command.
+"""
+        try:
+            fix = self.llm.generate(prompt, max_tokens=200)
+            return fix.strip()
+        except:
+            return "# Manual fix required"
+    def search_solution(self, error_message: str) -> List[str]:
+        """
+        Search for solutions to error.
+        Simulates searching StackOverflow, documentation, etc.
+        Args:
+            error_message: Error message
+        Returns:
+            List of solution suggestions
+        """
+        # In full implementation, would search:
+        # - StackOverflow API
+        # - GitHub Issues
+        # - Documentation
+        # For now, use LLM to generate solutions
+        prompt = f"""
+This error occurred: {error_message[:500]}
+List 3 common solutions to this error.
+Respond with JSON:
+{{
+    "solutions": ["solution 1", "solution 2", "solution 3"]
+}}
+"""
+        try:
+            result = self.llm.generate_structured(prompt)
+            return result.get('solutions', [])
+        except:
+            return ["Check dependencies", "Review code", "Search documentation"]
+# Test
+if __name__ == "__main__":
+    from reproagent.models import LLMClient
+    llm = LLMClient()
+    debugger = Debugger(llm)
+    # Test error
+    error = """
+Traceback (most recent call last):
+  File "train.py", line 10, in <module>
+    import torch
+ModuleNotFoundError: No module named 'torch'
+"""
+    analysis = debugger.analyze_error(error)
+    print(analysis)
+    fix = debugger.generate_fix(analysis)
+    print(f"\nFix: {fix}")

agents/paper_parser.py ADDED Viewed

	@@ -0,0 +1,319 @@

+"""
+Paper parsing agent - extracts structured information from PDFs.
+"""
+import re
+from typing import Dict, Any, List, Optional
+from pathlib import Path
+from reproagent.models import LLMClient
+from reproagent.state import PaperState
+class PaperParser:
+    """
+    Parses research papers and extracts key information.
+    Uses LLM to extract structured data from paper text.
+    """
+    def __init__(self, llm_client: LLMClient):
+        """
+        Args:
+            llm_client: LLM client for extraction
+        """
+        self.llm = llm_client
+    def parse_paper(self, pdf_path: str) -> PaperState:
+        """
+        Parse paper and extract structured information.
+        Args:
+            pdf_path: Path to PDF file
+        Returns:
+            PaperState with extracted info
+        """
+        print(f"📄 Parsing paper: {pdf_path}")
+        # Extract text from PDF
+        text = self._extract_text(pdf_path)
+        if not text or text.startswith("Error"):
+            print(f"❌ Failed to extract text from PDF")
+            return PaperState(pdf_path=pdf_path)
+        print(f"✅ Extracted {len(text)} characters")
+        # Extract structured info with LLM
+        extracted = self._extract_with_llm(text)
+        # Build PaperState
+        state = PaperState(
+            pdf_path=pdf_path,
+            title=extracted.get('title', ''),
+            abstract=extracted.get('abstract', ''),
+            dataset=extracted.get('dataset', ''),
+            model=extracted.get('model', ''),
+            target_metric=float(extracted.get('target_metric', 0.0)),
+            metric_name=extracted.get('metric_name', 'accuracy'),
+            github_links=extracted.get('github_links', []),
+            key_claims=extracted.get('key_claims', []),
+            parsed=True,
+            confidence=extracted.get('confidence', 0.8)
+        )
+        print(f"✅ Paper parsed: {state.title}")
+        print(f"   Dataset: {state.dataset}")
+        print(f"   Model: {state.model}")
+        print(f"   Target: {state.target_metric} {state.metric_name}")
+        return state
+    def _extract_text(self, pdf_path: str) -> str:
+        """
+        Extract text from PDF.
+        Tries multiple methods.
+        """
+        try:
+            # Try PyPDF2 first (faster)
+            import PyPDF2
+            with open(pdf_path, 'rb') as file:
+                reader = PyPDF2.PdfReader(file)
+                text = ""
+                # Extract first 10 pages
+                for page in reader.pages[:10]:
+                    text += page.extract_text() + "\n"
+                return text
+        except Exception as e:
+            print(f"⚠️  PyPDF2 failed: {e}")
+            try:
+                # Try pdfplumber (more accurate)
+                import pdfplumber
+                text = ""
+                with pdfplumber.open(pdf_path) as pdf:
+                    for page in pdf.pages[:10]:
+                        text += page.extract_text() + "\n"
+                return text
+            except Exception as e2:
+                print(f"⚠️  pdfplumber failed: {e2}")
+                return f"Error: Could not extract text from PDF"
+    def _extract_with_llm(self, text: str) -> Dict[str, Any]:
+        """
+        Use LLM to extract structured information.
+        Args:
+            text: Paper text
+        Returns:
+            Extracted information dict
+        """
+        # Truncate text to fit in context
+        text_sample = text[:5000]
+        prompt = f"""
+Extract the following information from this research paper:
+1. **Title**: Full paper title
+2. **Abstract**: Paper abstract (if present)
+3. **Dataset**: Dataset used (e.g., "CIFAR-10", "ImageNet", "COCO")
+4. **Model**: Model architecture (e.g., "ResNet-50", "BERT", "GPT-2")
+5. **Target Metric**: Best reported performance value as a number. Extract exactly what is in the text.
+6. **Metric Name**: Type of metric (e.g., "FID", "accuracy", "CLIP score", "BLEU"). DO NOT default to accuracy!
+7. **GitHub Links**: Any GitHub URLs mentioned (full URLs)
+8. **Key Claims**: Main performance claims (list)
+Paper excerpt:
+{text_sample}
+Respond with ONLY valid JSON in this exact format:
+{{
+    "title": "paper title here",
+    "abstract": "abstract text here",
+    "dataset": "dataset name",
+    "model": "model name",
+    "target_metric": 12.34,
+    "metric_name": "FID",
+    "github_links": ["https://github.com/user/repo"],
+    "key_claims": ["claim 1", "claim 2"],
+    "confidence": 0.9
+}}
+"""
+        try:
+            result = self.llm.generate_structured(prompt)
+            # Validate and clean result
+            if 'error' not in result:
+                # Ensure github_links is a list
+                if 'github_links' in result and isinstance(result['github_links'], str):
+                    result['github_links'] = [result['github_links']]
+                # Extract GitHub links from text if none found
+                if not result.get('github_links'):
+                    result['github_links'] = self._extract_github_links(text)
+                return result
+            else:
+                print(f"⚠️  LLM extraction failed: {result.get('error')}")
+        except Exception as e:
+            print(f"⚠️  LLM error: {e}")
+        # Fallback: regex extraction
+        return self._fallback_extraction(text)
+    def _extract_github_links(self, text: str) -> List[str]:
+        """Extract GitHub URLs using regex."""
+        pattern = r'https?://github\.com/[\w\-]+/[\w\-]+'
+        matches = re.findall(pattern, text)
+        return list(set(matches))  # unique links
+    def _fallback_extraction(self, text: str) -> Dict[str, Any]:
+        """
+        Fallback extraction using simple heuristics.
+        Used when LLM fails.
+        """
+        print("⚠️  Using fallback extraction")
+        # Extract title (usually first line or after "Title:")
+        title = ""
+        lines = text.split('\n')
+        for line in lines[:20]:
+            if line.strip() and len(line.strip()) > 10:
+                title = line.strip()
+                break
+        # Extract dataset mentions
+        dataset = ""
+        dataset_patterns = [
+            r'(CIFAR-10|CIFAR-100|ImageNet|COCO|MNIST|Fashion-MNIST)',
+            r'(?:on|using|dataset)\s+(\w+)',
+        ]
+        for pattern in dataset_patterns:
+            match = re.search(pattern, text, re.IGNORECASE)
+            if match:
+                dataset = match.group(1)
+                break
+        # Extract model mentions
+        model = ""
+        model_patterns = [
+            r'(ResNet-\d+|VGG-\d+|BERT|GPT-\d+|Transformer)',
+            r'(AlexNet|DenseNet|MobileNet|EfficientNet)',
+        ]
+        for pattern in model_patterns:
+            match = re.search(pattern, text, re.IGNORECASE)
+            if match:
+                model = match.group(1)
+                break
+        # Extract metrics
+        metric_pattern = r'(\d+\.?\d*)\s*%?\s*(accuracy|precision|recall|F1|BLEU)'
+        metric_match = re.search(metric_pattern, text, re.IGNORECASE)
+        target_metric = 0.0
+        metric_name = "accuracy"
+        if metric_match:
+            target_metric = float(metric_match.group(1))
+            metric_name = metric_match.group(2).lower()
+            # Convert percentage to decimal
+            if target_metric > 1.0:
+                target_metric = target_metric / 100.0
+        # GitHub links
+        github_links = self._extract_github_links(text)
+        return {
+            'title': title or "Unknown Paper",
+            'abstract': "",
+            'dataset': dataset or "Unknown",
+            'model': model or "Unknown",
+            'target_metric': target_metric,
+            'metric_name': metric_name,
+            'github_links': github_links,
+            'key_claims': [],
+            'confidence': 0.5
+        }
+    def parse_from_arxiv(self, arxiv_id: str) -> PaperState:
+        """
+        Parse paper from ArXiv ID.
+        Args:
+            arxiv_id: ArXiv paper ID (e.g., "2103.00020")
+        Returns:
+            PaperState
+        """
+        print(f"📄 Fetching paper from ArXiv: {arxiv_id}")
+        try:
+            import requests
+            # Fetch ArXiv metadata
+            url = f"http://export.arxiv.org/api/query?id_list={arxiv_id}"
+            response = requests.get(url)
+            if response.status_code == 200:
+                # Parse XML response
+                import xml.etree.ElementTree as ET
+                root = ET.fromstring(response.content)
+                # Extract metadata
+                entry = root.find('{http://www.w3.org/2005/Atom}entry')
+                if entry:
+                    title = entry.find('{http://www.w3.org/2005/Atom}title').text.strip()
+                    abstract = entry.find('{http://www.w3.org/2005/Atom}summary').text.strip()
+                    # Use LLM to extract technical details from abstract
+                    extracted = self._extract_with_llm(f"Title: {title}\n\nAbstract: {abstract}")
+                    return PaperState(
+                        pdf_path=f"arxiv:{arxiv_id}",
+                        title=title,
+                        abstract=abstract,
+                        dataset=extracted.get('dataset', ''),
+                        model=extracted.get('model', ''),
+                        target_metric=extracted.get('target_metric', 0.0),
+                        metric_name=extracted.get('metric_name', 'accuracy'),
+                        github_links=extracted.get('github_links', []),
+                        key_claims=extracted.get('key_claims', []),
+                        parsed=True,
+                        confidence=0.7
+                    )
+        except Exception as e:
+            print(f"❌ ArXiv fetch failed: {e}")
+        return PaperState(pdf_path=f"arxiv:{arxiv_id}")
+# Test
+if __name__ == "__main__":
+    from reproagent.models import LLMClient
+    llm = LLMClient()
+    parser = PaperParser(llm)
+    # Test with sample text
+    sample_text = """
+    Deep Residual Learning for Image Recognition
+    Abstract: We present a residual learning framework to ease the training of networks
+    that are substantially deeper than those used previously. We achieve 95.2% accuracy
+    on CIFAR-10 dataset using ResNet-50 architecture.
+    Code: https://github.com/example/resnet-cifar10
+    """
+    result = parser._extract_with_llm(sample_text)
+    print(result)

agents/reasoning_agent.py ADDED Viewed

	@@ -0,0 +1,508 @@

+"""
+Main reasoning agent - orchestrates the entire reproduction workflow.
+Uses hypothesis-driven approach to intelligently navigate the reproduction process.
+"""
+from typing import Dict, Any, Optional, Tuple, List
+import numpy as np
+from reproagent.environment import ReproAgentEnv
+from reproagent.state import ReproductionState, Phase
+from reproagent.actions import ActionSpace, ActionType, Action
+from reproagent.models import LLMClient
+from agents.paper_parser import PaperParser
+from agents.repo_analyzer import RepoAnalyzer
+from agents.debugger import Debugger
+class ReasoningAgent:
+    """
+    Main intelligent agent for paper reproduction.
+    Strategy:
+    1. Parse paper → understand what to reproduce
+    2. Find & analyze repo → understand how to reproduce
+    3. Setup environment → prepare for execution
+    4. Execute & debug → run code, fix errors
+    5. Experiment → tune hyperparameters
+    6. Compare → validate reproduction
+    """
+    def __init__(self, env: ReproAgentEnv, use_llm: bool = True):
+        """
+        Args:
+            env: ReproAgent environment
+            use_llm: Whether to use LLM for reasoning
+        """
+        self.env = env
+        self.action_space = ActionSpace()
+        self.use_llm = use_llm
+        # Initialize LLM and sub-agents
+        if use_llm:
+            try:
+                self.llm = LLMClient()
+            except:
+                print("⚠️  LLM not available, using rule-based mode")
+                self.llm = LLMClient(provider="mock")
+                self.use_llm = False
+        else:
+            self.llm = LLMClient(provider="mock")
+        self.paper_parser = PaperParser(self.llm)
+        self.repo_analyzer = RepoAnalyzer(self.llm)
+        self.debugger = Debugger(self.llm)
+        # Agent state
+        self.current_strategy = "systematic"  # systematic, debugging, experimenting
+        self.hypotheses = []
+        self.phase_progress = {
+            Phase.PARSING: False,
+            Phase.REPO_ANALYSIS: False,
+            Phase.SETUP: False,
+            Phase.EXECUTION: False,
+            Phase.DEBUGGING: False,
+            Phase.EXPERIMENTATION: False,
+        }
+    def select_action(
+        self,
+        observation: Dict[str, np.ndarray],
+        info: Dict[str, Any]
+    ) -> int:
+        """
+        Select next action based on current state.
+        Args:
+            observation: Environment observation
+            info: Additional info
+        Returns:
+            Action ID
+        """
+        # Get current state from environment
+        state = self.env.state
+        # Determine strategy based on phase
+        if state.meta.phase == Phase.IDLE or state.meta.phase == Phase.PARSING:
+            return self._parsing_phase_action(state)
+        elif state.meta.phase == Phase.REPO_ANALYSIS:
+            return self._repo_analysis_action(state)
+        elif state.meta.phase == Phase.SETUP:
+            return self._setup_phase_action(state)
+        elif state.meta.phase == Phase.EXECUTION:
+            return self._execution_phase_action(state)
+        elif state.meta.phase == Phase.DEBUGGING:
+            return self._debugging_phase_action(state)
+        elif state.meta.phase == Phase.EXPERIMENTATION:
+            return self._experimentation_action(state)
+        elif state.meta.phase == Phase.COMPARISON:
+            if not getattr(state.meta, 'report_generated', False):
+                return self.action_space.get_id_by_action(ActionType.GENERATE_REPORT)
+            else:
+                return self.action_space.get_id_by_action(ActionType.STOP_PROCESS)
+        else:
+            # Default: random exploration
+            return self.env.action_space.sample()
+    def _parsing_phase_action(self, state: ReproductionState) -> int:
+        """Actions for paper parsing phase."""
+        if not state.paper.parsed:
+            return self.action_space.get_id_by_action(ActionType.PARSE_PDF)
+        elif not state.paper.github_links:
+            return self.action_space.get_id_by_action(ActionType.EXTRACT_GITHUB)
+        else:
+            # Parsing is complete — move to repo cloning
+            if not state.repo.cloned:
+                return self.action_space.get_id_by_action(ActionType.CLONE_REPO)
+            else:
+                return self.action_space.get_id_by_action(ActionType.READ_README)
+    def _repo_analysis_action(self, state: ReproductionState) -> int:
+        """Actions for repository analysis phase."""
+        if not state.repo.cloned and state.paper.github_links:
+            return self.action_space.get_id_by_action(ActionType.CLONE_REPO)
+        elif state.repo.cloned and not state.repo.readme_content:
+            return self.action_space.get_id_by_action(ActionType.READ_README)
+        elif state.repo.readme_content and not state.repo.entry_point:
+            return self.action_space.get_id_by_action(ActionType.FIND_ENTRY_POINT)
+        elif state.repo.entry_point and not state.repo.dependencies:
+            return self.action_space.get_id_by_action(ActionType.EXTRACT_DEPS)
+        else:
+            # Repo fully analyzed — move to environment setup (CREATE_VENV first!)
+            return self.action_space.get_id_by_action(ActionType.CREATE_VENV)
+    def _setup_phase_action(self, state: ReproductionState) -> int:
+        """Actions for environment setup phase."""
+        if not state.environment.setup_complete:
+            if state.repo.dependencies:
+                return self.action_space.get_id_by_action(ActionType.INSTALL_REQUIREMENTS)
+            else:
+                # Even with no explicit deps listed, verify setup
+                return self.action_space.get_id_by_action(ActionType.VERIFY_SETUP)
+        else:
+            # Setup complete — move to execution
+            return self.action_space.get_id_by_action(ActionType.RUN_TRAINING)
+    def _execution_phase_action(self, state: ReproductionState) -> int:
+        """Actions for code execution phase."""
+        if state.execution.last_error:
+            # Transition to debugging
+            return self.action_space.get_id_by_action(ActionType.ANALYZE_ERROR)
+        elif state.experiment.current_metric > 0 and state.experiment.gap > 0.05:
+            # Has some results but gap is large — move to experimentation
+            return self.action_space.get_id_by_action(ActionType.RUN_EXPERIMENT)
+        elif state.experiment.current_metric > 0 and state.experiment.gap <= 0.05:
+            # Close enough — compare
+            return self.action_space.get_id_by_action(ActionType.COMPARE_RESULTS)
+        else:
+            # Run training
+            return self.action_space.get_id_by_action(ActionType.RUN_TRAINING)
+    def _debugging_phase_action(self, state: ReproductionState) -> int:
+        """Actions for debugging phase."""
+        total_debug_actions = len(state.debug.fix_attempts) + len(state.debug.solutions_tried)
+        # Cap: after 3 debug attempts, give up and compare what we have
+        if total_debug_actions >= 3:
+            state.debug.current_error = ""  # clear to break loop
+            return self.action_space.get_id_by_action(ActionType.COMPARE_RESULTS)
+        if state.debug.current_error and not state.debug.last_hypothesis:
+            return self.action_space.get_id_by_action(ActionType.ANALYZE_ERROR)
+        elif state.debug.last_hypothesis and len(state.debug.fix_attempts) == 0:
+            return self.action_space.get_id_by_action(ActionType.APPLY_FIX)
+        elif state.debug.current_error:
+            return self.action_space.get_id_by_action(ActionType.APPLY_FIX)
+        else:
+            # Error resolved — back to execution
+            return self.action_space.get_id_by_action(ActionType.RUN_TRAINING)
+    def _experimentation_action(self, state: ReproductionState) -> int:
+        """Actions for hyperparameter tuning phase."""
+        gap = state.experiment.gap
+        experiments_run = state.experiment.experiments_run
+        # Use LLM for intelligent hyperparameter selection if available
+        if self.use_llm and experiments_run > 0:
+            action = self._llm_suggest_hyperparameter_action(state)
+            if action is not None:
+                return action
+        # Rule-based: alternate between tuning a param and running an experiment
+        if experiments_run > 0 and experiments_run % 2 == 0:
+            # Every other step, run an experiment to measure progress
+            return self.action_space.get_id_by_action(ActionType.RUN_EXPERIMENT)
+        if gap > 0.3:
+            return self.action_space.get_id_by_action(ActionType.MODIFY_LR)
+        elif gap > 0.15:
+            if experiments_run % 4 < 2:
+                return self.action_space.get_id_by_action(ActionType.MODIFY_BATCH)
+            else:
+                return self.action_space.get_id_by_action(ActionType.MODIFY_OPTIMIZER)
+        elif gap > 0.05:
+            return self.action_space.get_id_by_action(ActionType.ADD_REGULARIZATION)
+        else:
+            # Very close — run experiment to lock in
+            return self.action_space.get_id_by_action(ActionType.RUN_EXPERIMENT)
+    def _llm_suggest_hyperparameter_action(self, state: ReproductionState) -> Optional[int]:
+        """Use LLM to suggest next hyperparameter action."""
+        prompt = f"""
+You are tuning hyperparameters to reproduce a paper's results.
+Current state:
+- Target metric: {state.paper.target_metric:.3f}
+- Current metric: {state.experiment.current_metric:.3f}
+- Gap: {state.experiment.gap:.3f}
+- Experiments run: {state.experiment.experiments_run}
+- Current config: {state.experiment.current_config}
+What should be adjusted next?
+Options:
+1. learning_rate
+2. batch_size
+3. optimizer
+4. epochs
+5. regularization
+6. run_experiment (test current config)
+Respond with JSON:
+{{
+    "action": "learning_rate",
+    "reasoning": "why this action"
+}}
+"""
+        try:
+            result = self.llm.generate_structured(prompt)
+            action_name = result.get('action', '')
+            action_map = {
+                'learning_rate': ActionType.MODIFY_LR,
+                'batch_size': ActionType.MODIFY_BATCH,
+                'optimizer': ActionType.MODIFY_OPTIMIZER,
+                'epochs': ActionType.MODIFY_EPOCHS,
+                'regularization': ActionType.ADD_REGULARIZATION,
+                'run_experiment': ActionType.RUN_EXPERIMENT
+            }
+            if action_name in action_map:
+                action_type = action_map[action_name]
+                return self.action_space.get_id_by_action(action_type)
+        except Exception as e:
+            print(f"⚠️  LLM suggestion failed: {e}")
+        return None
+    def form_hypothesis(self, state: ReproductionState) -> str:
+        """
+        Form hypothesis about what's preventing reproduction.
+        Args:
+            state: Current state
+        Returns:
+            Hypothesis string
+        """
+        if not state.paper.parsed:
+            return "Need to parse paper to understand target"
+        elif not state.repo.cloned:
+            return "Need to find and clone repository"
+        elif state.debug.current_error:
+            return f"Need to fix error: {state.debug.current_error[:50]}"
+        elif state.experiment.gap > 0.2:
+            return "Hyperparameters are significantly off from optimal"
+        elif state.experiment.gap > 0.05:
+            return "Need fine-tuning of hyperparameters"
+        else:
+            return "Close to target, validating reproduction"
+    def get_reasoning(self, state: ReproductionState, action_id: int) -> str:
+        """
+        Generate human-readable reasoning for action.
+        Args:
+            state: Current state
+            action_id: Selected action
+        Returns:
+            Reasoning string
+        """
+        action_type = self.action_space.get_action_by_id(action_id)
+        reasoning_map = {
+            ActionType.PARSE_PDF: f"📄 Parsing paper to extract methodology",
+            ActionType.EXTRACT_GITHUB: f"🔍 Looking for implementation repository",
+            ActionType.CLONE_REPO: f"📥 Cloning repository: {state.paper.github_links[0] if state.paper.github_links else 'unknown'}",
+            ActionType.READ_README: f"📖 Reading setup instructions",
+            ActionType.INSTALL_REQUIREMENTS: f"📦 Installing {len(state.repo.dependencies)} dependencies",
+            ActionType.RUN_TRAINING: f"🚀 Executing training script",
+            ActionType.ANALYZE_ERROR: f"🔍 Analyzing error: {state.debug.current_error[:30]}...",
+            ActionType.APPLY_FIX: f"🔧 Applying fix attempt #{len(state.debug.fix_attempts) + 1}",
+            ActionType.RUN_EXPERIMENT: f"🧪 Running experiment #{state.experiment.experiments_run + 1}",
+            ActionType.MODIFY_LR: f"⚙️  Adjusting learning rate (gap: {state.experiment.gap:.3f})",
+            ActionType.COMPARE_RESULTS: f"📊 Comparing results: {state.experiment.current_metric:.3f} vs {state.paper.target_metric:.3f}",
+        }
+        return reasoning_map.get(action_type, f"Executing {action_type.value}")
+    def reset(self):
+        """Reset agent for new episode."""
+        self.current_strategy = "systematic"
+        self.hypotheses = []
+        self.phase_progress = {phase: False for phase in Phase}
+    def get_stats(self) -> Dict[str, Any]:
+        """Get agent statistics."""
+        return {
+            'strategy': self.current_strategy,
+            'hypotheses_formed': len(self.hypotheses),
+            'phases_completed': sum(self.phase_progress.values())
+        }
+class RLAgent:
+    """
+    RL-trainable agent (for PPO/DPO training).
+    Uses neural network policy.
+    """
+    def __init__(self, env: ReproAgentEnv, policy_network=None):
+        """
+        Args:
+            env: Environment
+            policy_network: Pre-trained policy (optional)
+        """
+        self.env = env
+        self.policy = policy_network
+        if policy_network is None:
+            self._init_policy()
+    def _init_policy(self):
+        """Initialize policy network."""
+        try:
+            import torch
+            import torch.nn as nn
+            # Simple MLP policy
+            obs_dim = 25  # 5 feature vectors × 5 dims each
+            action_dim = self.env.action_space.n
+            self.policy = nn.Sequential(
+                nn.Linear(obs_dim, 128),
+                nn.ReLU(),
+                nn.Linear(128, 128),
+                nn.ReLU(),
+                nn.Linear(128, action_dim),
+                nn.Softmax(dim=-1)
+            )
+        except ImportError:
+            print("⚠️  PyTorch not installed, using random policy")
+            self.policy = None
+    def select_action(
+        self,
+        observation: Dict[str, np.ndarray],
+        info: Dict[str, Any]
+    ) -> int:
+        """Select action using policy network."""
+        if self.policy is None:
+            return self.env.action_space.sample()
+        try:
+            import torch
+            # Flatten observation
+            obs_vec = np.concatenate([
+                observation['paper_features'],
+                observation['repo_features'],
+                observation['execution_features'],
+                observation['experiment_features'],
+                observation['meta_features']
+            ])
+            obs_tensor = torch.FloatTensor(obs_vec).unsqueeze(0)
+            with torch.no_grad():
+                action_probs = self.policy(obs_tensor)
+            # Sample action
+            action = torch.multinomial(action_probs, 1).item()
+            return action
+        except:
+            return self.env.action_space.sample()
+    def reset(self):
+        """Reset agent."""
+        pass
+    def get_stats(self) -> Dict[str, Any]:
+        """Get stats."""
+        return {'type': 'RL'}
+# Factory function
+def create_agent(env: ReproAgentEnv, agent_type: str = "reasoning", **kwargs):
+    """
+    Factory function to create agents.
+    Args:
+        env: Environment
+        agent_type: 'reasoning', 'rl', or 'random'
+        **kwargs: Additional arguments
+    Returns:
+        Agent instance
+    """
+    if agent_type == "reasoning":
+        return ReasoningAgent(env, use_llm=kwargs.get('use_llm', True))
+    elif agent_type == "rl":
+        return RLAgent(env, policy_network=kwargs.get('policy', None))
+    elif agent_type == "random":
+        # Simple random agent for baseline
+        class RandomAgent:
+            def __init__(self, env):
+                self.env = env
+            def select_action(self, obs, info):
+                return self.env.action_space.sample()
+            def reset(self):
+                pass
+            def get_stats(self):
+                return {'type': 'random'}
+            def get_reasoning(self, state, action_id):
+                return f"Random action: {action_id}"
+        return RandomAgent(env)
+    else:
+        raise ValueError(f"Unknown agent type: {agent_type}")
+# Test
+if __name__ == "__main__":
+    from reproagent.environment import ReproAgentEnv
+    # Create environment
+    env = ReproAgentEnv(difficulty="easy", use_llm=False)
+    # Create agent
+    agent = create_agent(env, agent_type="reasoning", use_llm=False)
+    # Run episode
+    obs, info = env.reset()
+    for step in range(20):
+        action = agent.select_action(obs, info)
+        obs, reward, terminated, truncated, info = env.step(action)
+        print(f"Step {step + 1}: {info.get('action_type', 'unknown')} | Reward: {reward:.2f}")
+        if terminated or truncated:
+            break
+    print(f"\nFinal metric: {info.get('current_metric', 0.0):.3f}")

agents/repo_analyzer.py ADDED Viewed

	@@ -0,0 +1,338 @@

+"""
+Repository analyzer - analyzes GitHub repositories.
+"""
+import os
+import re
+from typing import Dict, Any, List, Optional
+from pathlib import Path
+import subprocess
+from reproagent.models import LLMClient
+from reproagent.state import RepoState
+class RepoAnalyzer:
+    """
+    Analyzes GitHub repositories to understand:
+    - Code structure
+    - Dependencies
+    - Entry points
+    - Setup instructions
+    """
+    def __init__(self, llm_client: LLMClient):
+        """
+        Args:
+            llm_client: LLM for code analysis
+        """
+        self.llm = llm_client
+    def analyze_repo(self, repo_url: str, local_path: Optional[str] = None) -> RepoState:
+        """
+        Analyze a GitHub repository.
+        Args:
+            repo_url: GitHub URL
+            local_path: Local path (if already cloned)
+        Returns:
+            RepoState with analysis
+        """
+        print(f"📦 Analyzing repository: {repo_url}")
+        # Clone if needed
+        if not local_path:
+            local_path = self._clone_repo(repo_url)
+        if not local_path or not Path(local_path).exists():
+            print(f"❌ Failed to access repository")
+            return RepoState(url=repo_url)
+        # Analyze components
+        readme_content = self._read_readme(local_path)
+        dependencies = self._extract_dependencies(local_path)
+        entry_point = self._find_entry_point(local_path)
+        framework = self._detect_framework(local_path, dependencies)
+        setup_instructions = self._extract_setup_instructions(readme_content)
+        state = RepoState(
+            url=repo_url,
+            cloned=True,
+            local_path=local_path,
+            readme_content=readme_content,
+            setup_instructions=setup_instructions,
+            dependencies=dependencies,
+            entry_point=entry_point,
+            framework=framework,
+            repo_quality_score=self._calculate_quality_score(local_path, readme_content)
+        )
+        print(f"✅ Repository analyzed")
+        print(f"   Framework: {state.framework}")
+        print(f"   Entry point: {state.entry_point}")
+        print(f"   Dependencies: {len(state.dependencies)}")
+        return state
+    def _clone_repo(self, repo_url: str) -> Optional[str]:
+        """
+        Clone GitHub repository.
+        Args:
+            repo_url: GitHub URL
+        Returns:
+            Local path or None if failed
+        """
+        try:
+            # Create temp directory
+            import tempfile
+            temp_dir = tempfile.mkdtemp(prefix="reproagent_")
+            print(f"📥 Cloning to {temp_dir}...")
+            # Clone with git
+            result = subprocess.run(
+                ['git', 'clone', '--depth', '1', repo_url, temp_dir],
+                capture_output=True,
+                text=True,
+                timeout=60
+            )
+            if result.returncode == 0:
+                print(f"✅ Repository cloned")
+                return temp_dir
+            else:
+                print(f"❌ Clone failed: {result.stderr}")
+                return None
+        except Exception as e:
+            print(f"❌ Clone error: {e}")
+            return None
+    def _read_readme(self, repo_path: str) -> str:
+        """Read README file."""
+        readme_files = ['README.md', 'README.rst', 'README.txt', 'README']
+        for readme_name in readme_files:
+            readme_path = Path(repo_path) / readme_name
+            if readme_path.exists():
+                try:
+                    with open(readme_path, 'r', encoding='utf-8') as f:
+                        return f.read()
+                except Exception as e:
+                    print(f"⚠️  Error reading {readme_name}: {e}")
+        return ""
+    def _extract_dependencies(self, repo_path: str) -> List[str]:
+        """Extract dependencies from requirements files."""
+        dependencies = []
+        # Check requirements.txt
+        req_path = Path(repo_path) / 'requirements.txt'
+        if req_path.exists():
+            try:
+                with open(req_path, 'r') as f:
+                    for line in f:
+                        line = line.strip()
+                        if line and not line.startswith('#'):
+                            # Extract package name (before ==, >=, etc.)
+                            pkg = re.split(r'[=<>!]', line)[0].strip()
+                            dependencies.append(pkg)
+            except Exception as e:
+                print(f"⚠️  Error reading requirements.txt: {e}")
+        # Check setup.py
+        setup_path = Path(repo_path) / 'setup.py'
+        if setup_path.exists():
+            try:
+                with open(setup_path, 'r') as f:
+                    content = f.read()
+                    # Look for install_requires
+                    match = re.search(r'install_requires\s*=\s*\[(.*?)\]', content, re.DOTALL)
+                    if match:
+                        deps_str = match.group(1)
+                        for dep in re.findall(r'["\']([^"\']+)["\']', deps_str):
+                            pkg = re.split(r'[=<>!]', dep)[0].strip()
+                            if pkg not in dependencies:
+                                dependencies.append(pkg)
+            except Exception as e:
+                print(f"⚠️  Error reading setup.py: {e}")
+        # Check pyproject.toml
+        pyproject_path = Path(repo_path) / 'pyproject.toml'
+        if pyproject_path.exists():
+            try:
+                import tomli
+                with open(pyproject_path, 'rb') as f:
+                    data = tomli.load(f)
+                    deps = data.get('project', {}).get('dependencies', [])
+                    for dep in deps:
+                        pkg = re.split(r'[=<>!]', dep)[0].strip()
+                        if pkg not in dependencies:
+                            dependencies.append(pkg)
+            except:
+                pass
+        return dependencies
+    def _find_entry_point(self, repo_path: str) -> str:
+        """Find main entry point script."""
+        # Common entry point names
+        candidates = [
+            'train.py',
+            'main.py',
+            'run.py',
+            'train_model.py',
+            'finetune.py',
+            'run_training.py'
+        ]
+        repo_dir = Path(repo_path)
+        for candidate in candidates:
+            if (repo_dir / candidate).exists():
+                return candidate
+        # Search in subdirectories
+        for py_file in repo_dir.rglob('*.py'):
+            if py_file.stem in ['train', 'main', 'run']:
+                return str(py_file.relative_to(repo_dir))
+        return ""
+    def _detect_framework(self, repo_path: str, dependencies: List[str]) -> str:
+        """Detect ML framework used."""
+        dep_str = ' '.join(dependencies).lower()
+        if 'torch' in dep_str or 'pytorch' in dep_str:
+            return 'pytorch'
+        elif 'tensorflow' in dep_str or 'tf' in dep_str:
+            return 'tensorflow'
+        elif 'jax' in dep_str:
+            return 'jax'
+        elif 'keras' in dep_str:
+            return 'keras'
+        # Check imports in Python files
+        try:
+            for py_file in Path(repo_path).rglob('*.py'):
+                with open(py_file, 'r') as f:
+                    content = f.read(1000)  # First 1000 chars
+                    if 'import torch' in content:
+                        return 'pytorch'
+                    elif 'import tensorflow' in content:
+                        return 'tensorflow'
+        except:
+            pass
+        return "unknown"
+    def _extract_setup_instructions(self, readme_content: str) -> List[str]:
+        """
+        Extract setup instructions from README using LLM.
+        Args:
+            readme_content: README text
+        Returns:
+            List of setup steps
+        """
+        if not readme_content:
+            return []
+        # Truncate README
+        readme_sample = readme_content[:3000]
+        prompt = f"""
+Extract step-by-step setup/installation instructions from this README.
+README:
+{readme_sample}
+Respond with JSON:
+{{
+    "setup_steps": ["step 1", "step 2", ...]
+}}
+"""
+        try:
+            result = self.llm.generate_structured(prompt)
+            return result.get('setup_steps', [])
+        except:
+            # Fallback: simple extraction
+            return self._simple_setup_extraction(readme_content)
+    def _simple_setup_extraction(self, readme: str) -> List[str]:
+        """Simple regex-based setup extraction."""
+        steps = []
+        # Look for pip install commands
+        pip_pattern = r'pip install (.+)'
+        for match in re.finditer(pip_pattern, readme):
+            steps.append(f"pip install {match.group(1).strip()}")
+        # Look for numbered steps
+        step_pattern = r'^\d+\.\s+(.+)$'
+        for line in readme.split('\n'):
+            match = re.match(step_pattern, line.strip())
+            if match:
+                steps.append(match.group(1))
+        return steps[:10]  # Max 10 steps
+    def _calculate_quality_score(self, repo_path: str, readme: str) -> float:
+        """
+        Calculate repository quality score.
+        Factors:
+        - Has README
+        - Has requirements/setup files
+        - Has tests
+        - Code organization
+        """
+        score = 0.0
+        # Has README (0.3)
+        if readme:
+            score += 0.3
+        # Has requirements (0.2)
+        if (Path(repo_path) / 'requirements.txt').exists():
+            score += 0.2
+        # Has setup.py or pyproject.toml (0.2)
+        if (Path(repo_path) / 'setup.py').exists() or (Path(repo_path) / 'pyproject.toml').exists():
+            score += 0.2
+        # Has tests (0.15)
+        if (Path(repo_path) / 'tests').exists() or (Path(repo_path) / 'test').exists():
+            score += 0.15
+        # Has LICENSE (0.05)
+        if (Path(repo_path) / 'LICENSE').exists():
+            score += 0.05
+        # Has .gitignore (0.05)
+        if (Path(repo_path) / '.gitignore').exists():
+            score += 0.05
+        # Good README length (0.05)
+        if len(readme) > 500:
+            score += 0.05
+        return min(1.0, score)
+# Test
+if __name__ == "__main__":
+    from reproagent.models import LLMClient
+    llm = LLMClient()
+    analyzer = RepoAnalyzer(llm)
+    # Test with a real repo
+    state = analyzer.analyze_repo("https://github.com/pytorch/examples")
+    print(state.to_dict())

assets/loss_plot.png ADDED Viewed

assets/reward_plot.png ADDED Viewed