Spaces:

samarthnaikk
/

getgitspace

Runtime error

getgitspace / checkpoints.py

Samarth Naik

hf p1

0c87788 22 days ago

14 kB

	"""
	Checkpoint-based validation system for repository analysis.

	This module provides functionality to validate repository requirements using
	checkpoint definitions from a text file. Each checkpoint represents a requirement
	that is automatically evaluated using repository analysis and RAG capabilities.
	"""

	import os
	import logging
	from typing import List, Dict, Any, Optional
	from pathlib import Path
	import re

	from rag import Retriever, generate_response


	# Module logger
	logger = logging.getLogger('getgit.checkpoints')


	class CheckpointResult:
	"""
	Result from evaluating a single checkpoint.

	Attributes:
	checkpoint: The original checkpoint text
	passed: Whether the checkpoint passed validation
	explanation: Detailed explanation of the result
	evidence: Supporting files or information
	score: Optional confidence score (0.0-1.0)
	"""

	def __init__(
	self,
	checkpoint: str,
	passed: bool,
	explanation: str,
	evidence: Optional[List[str]] = None,
	score: Optional[float] = None
	):
	self.checkpoint = checkpoint
	self.passed = passed
	self.explanation = explanation
	self.evidence = evidence or []
	self.score = score

	def __repr__(self):
	status = "PASS" if self.passed else "FAIL"
	return f"CheckpointResult({status}, checkpoint='{self.checkpoint[:50]}...')"

	def format_output(self) -> str:
	"""Format the result as human-readable text."""
	status = "[PASS]" if self.passed else "[FAIL]"
	output = f"{status} {self.checkpoint}\n"
	output += f" {self.explanation}\n"
	if self.evidence:
	output += f" Evidence: {', '.join(self.evidence)}\n"
	if self.score is not None:
	output += f" Confidence: {self.score:.2f}\n"
	return output


	def load_checkpoints(file_path: str) -> List[str]:
	"""
	Load and parse checkpoint definitions from a text file.

	The file should contain one checkpoint per line, optionally numbered.
	Empty lines and lines starting with '#' are ignored.

	Args:
	file_path: Path to the checkpoints file

	Returns:
	List of checkpoint strings

	Raises:
	FileNotFoundError: If the checkpoints file doesn't exist
	ValueError: If the file is empty or contains no valid checkpoints

	Example:
	>>> checkpoints = load_checkpoints('checkpoints.txt')
	>>> print(checkpoints[0])
	Check if the repository has README.md
	"""
	logger.info(f"Loading checkpoints from {file_path}")

	if not os.path.exists(file_path):
	raise FileNotFoundError(f"Checkpoints file not found: {file_path}")

	checkpoints = []

	with open(file_path, 'r', encoding='utf-8') as f:
	for line_num, line in enumerate(f, 1):
	# Strip whitespace
	line = line.strip()

	# Skip empty lines and comments
	if not line or line.startswith('#'):
	continue

	# Remove numbering if present (e.g., "1. ", "1) ", "1 - ")
	checkpoint = re.sub(r'^\d+[\.\)\-\:]\s*', '', line)

	if checkpoint:
	checkpoints.append(checkpoint)
	logger.debug(f"Loaded checkpoint {len(checkpoints)}: {checkpoint[:50]}...")

	if not checkpoints:
	raise ValueError(f"No valid checkpoints found in {file_path}")

	logger.info(f"Loaded {len(checkpoints)} checkpoints")
	return checkpoints


	def _check_file_exists(checkpoint: str, repo_path: str) -> Optional[CheckpointResult]:
	"""
	Check if a checkpoint is asking about file existence and handle it deterministically.

	Args:
	checkpoint: The checkpoint text
	repo_path: Path to the repository

	Returns:
	CheckpointResult if it's a file existence check, None otherwise
	"""
	# Pattern matching for file existence checks
	# Look for common filenames with extensions
	file_pattern = r'\b([\w\-]+\.[\w]+)\b'

	matches = re.findall(file_pattern, checkpoint)

	# Check if this is actually asking about file existence
	existence_keywords = ['check if', 'has', 'contains', 'includes', 'exists', 'present', 'available']
	is_existence_check = any(keyword in checkpoint.lower() for keyword in existence_keywords)

	if matches and is_existence_check:
	# Use the first filename found
	filename = matches[0]

	# Search for the file in the repository
	found_files = []
	for root, dirs, files in os.walk(repo_path):
	# Skip hidden directories
	dirs[:] = [d for d in dirs if not d.startswith('.')]

	for file in files:
	if file.lower() == filename.lower():
	rel_path = os.path.relpath(os.path.join(root, file), repo_path)
	found_files.append(rel_path)

	if found_files:
	return CheckpointResult(
	checkpoint=checkpoint,
	passed=True,
	explanation=f"File '{filename}' found in repository",
	evidence=found_files,
	score=1.0
	)
	else:
	return CheckpointResult(
	checkpoint=checkpoint,
	passed=False,
	explanation=f"File '{filename}' not found in repository",
	evidence=[],
	score=1.0
	)

	return None


	def evaluate_checkpoint(
	checkpoint: str,
	repo_path: str,
	retriever: Retriever,
	use_llm: bool = True,
	api_key: Optional[str] = None,
	model_name: str = "gemini-2.5-flash"
	) -> CheckpointResult:
	"""
	Evaluate a single checkpoint and return result details.

	The evaluation process:
	1. Try deterministic checks first (e.g., file existence)
	2. Use RAG retrieval to find relevant context
	3. Optionally use LLM to interpret complex requirements

	Args:
	checkpoint: The checkpoint requirement to evaluate
	repo_path: Path to the repository
	retriever: Configured Retriever instance for RAG
	use_llm: Whether to use LLM for evaluation
	api_key: Optional API key for LLM
	model_name: Name of the LLM model to use

	Returns:
	CheckpointResult with evaluation outcome

	Example:
	>>> result = evaluate_checkpoint(
	... "Check if README.md exists",
	... "/path/to/repo",
	... retriever
	... )
	>>> print(result.format_output())
	"""
	logger.info(f"Evaluating checkpoint: {checkpoint[:50]}...")

	# Step 1: Try deterministic checks
	file_check = _check_file_exists(checkpoint, repo_path)
	if file_check:
	logger.info(f"Checkpoint evaluated deterministically: {'PASS' if file_check.passed else 'FAIL'}")
	return file_check

	# Step 2: Use RAG retrieval
	logger.debug("Using RAG retrieval for checkpoint evaluation")
	try:
	results = retriever.retrieve(checkpoint, top_k=5)

	if not results:
	return CheckpointResult(
	checkpoint=checkpoint,
	passed=False,
	explanation="No relevant information found in repository",
	evidence=[],
	score=0.0
	)

	# Collect evidence
	evidence_files = [result.chunk.file_path for result in results[:3]]
	context_chunks = [result.chunk.content for result in results]

	# Step 3: Use LLM for interpretation if available
	if use_llm:
	try:
	# Create a specialized prompt for checkpoint evaluation
	eval_prompt = f"""Based on the following repository context, evaluate this requirement:

	Requirement: {checkpoint}

	Repository Context:
	{chr(10).join(f"--- Chunk {i+1} ---{chr(10)}{chunk}" for i, chunk in enumerate(context_chunks[:3]))}

	Provide a clear evaluation:
	1. Does the repository satisfy this requirement? (Yes/No)
	2. Explain your reasoning in 1-2 sentences
	3. If applicable, mention specific files or components that demonstrate this

	Format your response as:
	RESULT: [Yes/No]
	EXPLANATION: [Your explanation]
	"""

	response = generate_response(
	eval_prompt,
	context_chunks,
	model_name=model_name,
	api_key=api_key
	)

	# Parse LLM response
	passed = "yes" in response.lower()[:100] # Check beginning of response
	explanation_match = re.search(r'EXPLANATION:\s*(.+?)(?:\n\n\|\Z)', response, re.DOTALL)

	if explanation_match:
	explanation = explanation_match.group(1).strip()
	else:
	explanation = response[:200] + "..." if len(response) > 200 else response

	# Calculate score based on retrieval scores
	avg_score = sum(r.score for r in results[:3]) / min(3, len(results))

	return CheckpointResult(
	checkpoint=checkpoint,
	passed=passed,
	explanation=explanation,
	evidence=evidence_files,
	score=avg_score
	)

	except Exception as e:
	logger.warning(f"LLM evaluation failed: {e}, falling back to RAG-only")

	# Fallback: Use retrieval scores only
	# If top result has high score, consider it a pass
	top_score = results[0].score
	threshold = 0.5 # Configurable threshold

	passed = top_score >= threshold
	explanation = f"Found relevant content (score: {top_score:.2f}). "
	if passed:
	explanation += f"Repository likely satisfies this requirement based on {len(results)} relevant chunks."
	else:
	explanation += f"Insufficient evidence found. Relevance score below threshold ({threshold})."

	return CheckpointResult(
	checkpoint=checkpoint,
	passed=passed,
	explanation=explanation,
	evidence=evidence_files,
	score=top_score
	)

	except Exception as e:
	logger.error(f"Error evaluating checkpoint: {e}")
	return CheckpointResult(
	checkpoint=checkpoint,
	passed=False,
	explanation=f"Evaluation error: {str(e)}",
	evidence=[],
	score=0.0
	)


	def run_checkpoints(
	checkpoints: List[str],
	repo_path: str,
	retriever: Retriever,
	use_llm: bool = True,
	api_key: Optional[str] = None,
	model_name: str = "gemini-2.5-flash",
	stop_on_failure: bool = False
	) -> List[CheckpointResult]:
	"""
	Run all checkpoints and return aggregated results.

	Evaluates each checkpoint sequentially and collects results.
	Optionally stops on first failure for fast-fail scenarios.

	Args:
	checkpoints: List of checkpoint requirements
	repo_path: Path to the repository
	retriever: Configured Retriever instance
	use_llm: Whether to use LLM for evaluation
	api_key: Optional API key for LLM
	model_name: Name of the LLM model to use
	stop_on_failure: Stop processing on first failure

	Returns:
	List of CheckpointResult objects

	Example:
	>>> checkpoints = load_checkpoints('checkpoints.txt')
	>>> results = run_checkpoints(checkpoints, repo_path, retriever)
	>>> for result in results:
	... print(result.format_output())
	"""
	logger.info(f"Running {len(checkpoints)} checkpoints")
	logger.info("="*70)

	results = []

	for i, checkpoint in enumerate(checkpoints, 1):
	logger.info(f"\nCheckpoint {i}/{len(checkpoints)}: {checkpoint[:50]}...")

	result = evaluate_checkpoint(
	checkpoint=checkpoint,
	repo_path=repo_path,
	retriever=retriever,
	use_llm=use_llm,
	api_key=api_key,
	model_name=model_name
	)

	results.append(result)

	# Log result
	status = "✓ PASS" if result.passed else "✗ FAIL"
	logger.info(f"{status}: {result.explanation[:100]}")

	# Stop on failure if requested
	if stop_on_failure and not result.passed:
	logger.warning(f"Stopping on failure at checkpoint {i}")
	break

	# Summary
	passed_count = sum(1 for r in results if r.passed)
	total = len(results)
	logger.info("\n" + "="*70)
	logger.info(f"Checkpoint Summary: {passed_count}/{total} passed")
	logger.info("="*70)

	return results


	def format_results_summary(results: List[CheckpointResult]) -> str:
	"""
	Format checkpoint results as a summary report.

	Args:
	results: List of CheckpointResult objects

	Returns:
	Formatted summary string
	"""
	output = []
	output.append("="*70)
	output.append("CHECKPOINT VALIDATION RESULTS")
	output.append("="*70)
	output.append("")

	for i, result in enumerate(results, 1):
	output.append(f"{i}. {result.format_output()}")

	# Summary statistics
	passed = sum(1 for r in results if r.passed)
	failed = len(results) - passed
	pass_rate = (passed / len(results) * 100) if results else 0

	output.append("="*70)
	output.append("SUMMARY")
	output.append("="*70)
	output.append(f"Total Checkpoints: {len(results)}")
	output.append(f"Passed: {passed}")
	output.append(f"Failed: {failed}")
	output.append(f"Pass Rate: {pass_rate:.1f}%")
	output.append("="*70)

	return "\n".join(output)