diff --git a/app.py b/app.py
new file mode 100644
index 0000000000000000000000000000000000000000..15631712576c7d1250b4f3939be87ca8a34e3af9
--- /dev/null
+++ b/app.py
@@ -0,0 +1,23 @@
+"""Hugging Face Spaces entry point for CodeRAG.
+
+This file is used by Hugging Face Spaces to launch the Gradio demo.
+It's configured to work without GPU (embeddings on CPU, LLM via Groq).
+"""
+
+import os
+
+# Configure for HF Spaces environment
+os.environ.setdefault("MODEL_LLM_PROVIDER", "groq")
+os.environ.setdefault("MODEL_EMBEDDING_DEVICE", "cpu")
+
+# Use HF Spaces secrets for API key
+if "GROQ_API_KEY" in os.environ and "MODEL_LLM_API_KEY" not in os.environ:
+    os.environ["MODEL_LLM_API_KEY"] = os.environ["GROQ_API_KEY"]
+
+# Import and launch the Gradio app
+from coderag.ui.app import create_gradio_app
+
+demo = create_gradio_app()
+
+if __name__ == "__main__":
+    demo.launch()
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b25839566749076c92b72fa4c7d6ba65b4254887
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,21 @@
+# Requirements for Hugging Face Spaces
+# Minimal dependencies for the Gradio demo
+
+fastapi>=0.115.0
+uvicorn[standard]>=0.32.0
+pydantic>=2.10.0
+pydantic-settings>=2.6.0
+gradio>=4.44.0
+transformers>=4.46.0
+sentence-transformers>=3.3.0
+chromadb>=0.5.0
+tree-sitter>=0.23.0
+tree-sitter-python>=0.23.0
+gitpython>=3.1.0
+python-dotenv>=1.0.0
+structlog>=24.4.0
+httpx>=0.27.0
+openai>=1.50.0
+
+# Note: torch is pre-installed on HF Spaces
+# Note: accelerate and bitsandbytes not needed for CPU-only inference
diff --git a/src/coderag/__init__.py b/src/coderag/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e55695a911a78ae94a1b7e5eacfa29ce7b5a2e1d
--- /dev/null
+++ b/src/coderag/__init__.py
@@ -0,0 +1,3 @@
+"""CodeRAG: RAG-based Q&A system for code repositories with verifiable citations."""
+
+__version__ = "0.1.0"
diff --git a/src/coderag/__pycache__/__init__.cpython-313.pyc b/src/coderag/__pycache__/__init__.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..388c5f277fe21e908ab094192ed63740955d2c49
Binary files /dev/null and b/src/coderag/__pycache__/__init__.cpython-313.pyc differ
diff --git a/src/coderag/__pycache__/cli.cpython-313.pyc b/src/coderag/__pycache__/cli.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b028ba700b63289421cd516fe60dbd9ca1a82405
Binary files /dev/null and b/src/coderag/__pycache__/cli.cpython-313.pyc differ
diff --git a/src/coderag/__pycache__/config.cpython-313.pyc b/src/coderag/__pycache__/config.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..245548ea151f413006fe9495a42446b03fa16112
Binary files /dev/null and b/src/coderag/__pycache__/config.cpython-313.pyc differ
diff --git a/src/coderag/__pycache__/logging.cpython-313.pyc b/src/coderag/__pycache__/logging.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e2606dd6d78a57166e0d27c9cf3dbbd2470bd764
Binary files /dev/null and b/src/coderag/__pycache__/logging.cpython-313.pyc differ
diff --git a/src/coderag/__pycache__/main.cpython-313.pyc b/src/coderag/__pycache__/main.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..90fd976eed4abbc8362f68570214f727e0049eee
Binary files /dev/null and b/src/coderag/__pycache__/main.cpython-313.pyc differ
diff --git a/src/coderag/api/__init__.py b/src/coderag/api/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..7f28268fa660f34efcc7ca878169b674b374222c
--- /dev/null
+++ b/src/coderag/api/__init__.py
@@ -0,0 +1,5 @@
+"""API module: REST endpoints for programmatic access."""
+
+from coderag.api.routes import router
+
+__all__ = ["router"]
diff --git a/src/coderag/api/__pycache__/__init__.cpython-313.pyc b/src/coderag/api/__pycache__/__init__.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..de950d5559fc279e734d2e0c1cffa3634f039db2
Binary files /dev/null and b/src/coderag/api/__pycache__/__init__.cpython-313.pyc differ
diff --git a/src/coderag/api/__pycache__/routes.cpython-313.pyc b/src/coderag/api/__pycache__/routes.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ab857a3b82b684056ae38dea7ca543e283e3d977
Binary files /dev/null and b/src/coderag/api/__pycache__/routes.cpython-313.pyc differ
diff --git a/src/coderag/api/__pycache__/schemas.cpython-313.pyc b/src/coderag/api/__pycache__/schemas.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4b8f401566d2b108bf20e822bf3fc1b2311d58f4
Binary files /dev/null and b/src/coderag/api/__pycache__/schemas.cpython-313.pyc differ
diff --git a/src/coderag/api/routes.py b/src/coderag/api/routes.py
new file mode 100644
index 0000000000000000000000000000000000000000..ed8a3b61573410cff41a262470390ba59c4c66c6
--- /dev/null
+++ b/src/coderag/api/routes.py
@@ -0,0 +1,282 @@
+"""REST API routes."""
+
+import json
+from datetime import datetime
+from typing import Optional
+
+from fastapi import APIRouter, HTTPException, BackgroundTasks
+from fastapi.responses import JSONResponse
+
+from coderag.api.schemas import (
+    IndexRepositoryRequest,
+    IndexRepositoryResponse,
+    QueryRequest,
+    QueryResponse,
+    ListRepositoriesResponse,
+    RepositoryInfo,
+    CitationResponse,
+    RetrievedChunkResponse,
+    ErrorResponse,
+)
+from coderag.config import get_settings
+from coderag.generation.generator import ResponseGenerator
+from coderag.indexing.embeddings import EmbeddingGenerator
+from coderag.indexing.vectorstore import VectorStore
+from coderag.ingestion.chunker import CodeChunker
+from coderag.ingestion.filter import FileFilter
+from coderag.ingestion.loader import RepositoryLoader
+from coderag.ingestion.validator import GitHubURLValidator, ValidationError
+from coderag.logging import get_logger
+from coderag.models.document import Document
+from coderag.models.query import Query as QueryModel
+from coderag.models.repository import Repository, RepositoryStatus
+
+logger = get_logger(__name__)
+router = APIRouter()
+
+# Global state (in production, use a proper database)
+settings = get_settings()
+repos_file = settings.data_dir / "repositories.json"
+repositories: dict[str, Repository] = {}
+
+
+def load_repositories() -> None:
+    """Load repositories from disk."""
+    global repositories
+    if repos_file.exists():
+        try:
+            data = json.loads(repos_file.read_text())
+            repositories = {r["id"]: Repository.from_dict(r) for r in data}
+        except Exception as e:
+            logger.error("Failed to load repositories", error=str(e))
+
+
+def save_repositories() -> None:
+    """Save repositories to disk."""
+    repos_file.parent.mkdir(parents=True, exist_ok=True)
+    data = [r.to_dict() for r in repositories.values()]
+    repos_file.write_text(json.dumps(data, indent=2))
+
+
+# Load on startup
+load_repositories()
+
+
+async def index_repository_task(
+    url: str,
+    repo_id: str,
+    branch: Optional[str],
+    include_patterns: Optional[list[str]],
+    exclude_patterns: Optional[list[str]],
+) -> None:
+    """Background task to index a repository."""
+    repo = repositories[repo_id]
+
+    try:
+        # Validate and clone
+        validator = GitHubURLValidator()
+        repo_info = await validator.validate_repository(url)
+        branch = branch or repo_info.branch or "main"
+
+        loader = RepositoryLoader()
+        repo_path = loader.clone_repository(repo_info, branch)
+
+        repo.clone_path = repo_path
+        repo.status = RepositoryStatus.INDEXING
+        save_repositories()
+
+        # Filter files
+        file_filter = FileFilter(
+            include_patterns=include_patterns,
+            exclude_patterns=exclude_patterns,
+        )
+        files = list(file_filter.filter_files(repo_path))
+
+        # Load documents
+        documents = []
+        for file_path in files:
+            try:
+                doc = Document.from_file(file_path, repo_path, repo.id)
+                documents.append(doc)
+            except Exception as e:
+                logger.warning("Failed to load file", path=str(file_path), error=str(e))
+
+        # Chunk
+        chunker = CodeChunker()
+        chunks = []
+        for doc in documents:
+            for chunk in chunker.chunk_document(doc):
+                chunks.append(chunk)
+
+        # Embed and store
+        if chunks:
+            vectorstore = VectorStore()
+            vectorstore.delete_repo_chunks(repo.id)
+
+            embedder = EmbeddingGenerator()
+            embedded_chunks = embedder.embed_chunks(chunks)
+            vectorstore.add_chunks(embedded_chunks)
+
+        # Update status
+        repo.chunk_count = len(chunks)
+        repo.indexed_at = datetime.now()
+        repo.status = RepositoryStatus.READY
+        save_repositories()
+
+        logger.info("Repository indexed", repo_id=repo_id, chunks=len(chunks))
+
+    except Exception as e:
+        logger.error("Indexing failed", repo_id=repo_id, error=str(e))
+        repo.status = RepositoryStatus.ERROR
+        repo.error_message = str(e)
+        save_repositories()
+
+
+@router.post("/repos/index", response_model=IndexRepositoryResponse, status_code=202)
+async def index_repository(
+    request: IndexRepositoryRequest,
+    background_tasks: BackgroundTasks,
+) -> IndexRepositoryResponse:
+    """Index a GitHub repository."""
+    # Create repository record
+    repo = Repository(
+        url=request.url,
+        branch=request.branch or "main",
+        status=RepositoryStatus.PENDING,
+    )
+    repositories[repo.id] = repo
+    save_repositories()
+
+    # Start background indexing
+    background_tasks.add_task(
+        index_repository_task,
+        request.url,
+        repo.id,
+        request.branch,
+        request.include_patterns,
+        request.exclude_patterns,
+    )
+
+    return IndexRepositoryResponse(
+        repo_id=repo.id,
+        status=repo.status.value,
+        message="Repository indexing started",
+    )
+
+
+@router.post("/query", response_model=QueryResponse)
+async def query_repository(request: QueryRequest) -> QueryResponse:
+    """Query a repository."""
+    # Check repository exists
+    if request.repo_id not in repositories:
+        raise HTTPException(status_code=404, detail="Repository not found")
+
+    repo = repositories[request.repo_id]
+    if repo.status != RepositoryStatus.READY:
+        raise HTTPException(
+            status_code=400,
+            detail=f"Repository not ready (status: {repo.status.value})",
+        )
+
+    try:
+        # Generate response
+        generator = ResponseGenerator()
+        query = QueryModel(
+            question=request.question,
+            repo_id=request.repo_id,
+            top_k=request.top_k,
+        )
+        response = generator.generate(query)
+
+        # Convert to API schema
+        return QueryResponse(
+            answer=response.answer,
+            citations=[
+                CitationResponse(
+                    file_path=c.file_path,
+                    start_line=c.start_line,
+                    end_line=c.end_line,
+                )
+                for c in response.citations
+            ],
+            retrieved_chunks=[
+                RetrievedChunkResponse(
+                    chunk_id=c.chunk_id,
+                    file_path=c.file_path,
+                    start_line=c.start_line,
+                    end_line=c.end_line,
+                    relevance_score=c.relevance_score,
+                    chunk_type=c.chunk_type,
+                    name=c.name,
+                    content=c.content,
+                )
+                for c in response.retrieved_chunks
+            ],
+            grounded=response.grounded,
+            query_id=response.query_id,
+        )
+
+    except Exception as e:
+        logger.error("Query failed", error=str(e))
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.get("/repos", response_model=ListRepositoriesResponse)
+async def list_repositories() -> ListRepositoriesResponse:
+    """List all repositories."""
+    return ListRepositoriesResponse(
+        repositories=[
+            RepositoryInfo(
+                id=repo.id,
+                url=repo.url,
+                branch=repo.branch,
+                chunk_count=repo.chunk_count,
+                status=repo.status.value,
+                indexed_at=repo.indexed_at,
+                error_message=repo.error_message,
+            )
+            for repo in repositories.values()
+        ]
+    )
+
+
+@router.get("/repos/{repo_id}", response_model=RepositoryInfo)
+async def get_repository(repo_id: str) -> RepositoryInfo:
+    """Get repository details."""
+    if repo_id not in repositories:
+        raise HTTPException(status_code=404, detail="Repository not found")
+
+    repo = repositories[repo_id]
+    return RepositoryInfo(
+        id=repo.id,
+        url=repo.url,
+        branch=repo.branch,
+        chunk_count=repo.chunk_count,
+        status=repo.status.value,
+        indexed_at=repo.indexed_at,
+        error_message=repo.error_message,
+    )
+
+
+@router.delete("/repos/{repo_id}")
+async def delete_repository(repo_id: str) -> dict:
+    """Delete a repository."""
+    if repo_id not in repositories:
+        raise HTTPException(status_code=404, detail="Repository not found")
+
+    repo = repositories[repo_id]
+
+    try:
+        # Delete from vector store
+        vectorstore = VectorStore()
+        vectorstore.delete_repo_chunks(repo_id)
+
+        # Delete from records
+        del repositories[repo_id]
+        save_repositories()
+
+        return {"message": f"Repository {repo.full_name} deleted"}
+
+    except Exception as e:
+        logger.error("Delete failed", error=str(e))
+        raise HTTPException(status_code=500, detail=str(e))
diff --git a/src/coderag/api/schemas.py b/src/coderag/api/schemas.py
new file mode 100644
index 0000000000000000000000000000000000000000..b3834e0301f43c6b435a45ebe1084be7aaf719a6
--- /dev/null
+++ b/src/coderag/api/schemas.py
@@ -0,0 +1,101 @@
+"""Pydantic schemas for REST API."""
+
+from datetime import datetime
+from typing import Optional
+
+from pydantic import BaseModel, Field, HttpUrl
+
+
+class IndexRepositoryRequest(BaseModel):
+    """Request to index a repository."""
+
+    url: str = Field(..., description="GitHub repository URL")
+    branch: Optional[str] = Field(None, description="Branch name (default: main)")
+    include_patterns: Optional[list[str]] = Field(None, description="File patterns to include")
+    exclude_patterns: Optional[list[str]] = Field(None, description="File patterns to exclude")
+
+
+class IndexRepositoryResponse(BaseModel):
+    """Response from indexing request."""
+
+    repo_id: str = Field(..., description="Repository ID")
+    status: str = Field(..., description="Indexing status")
+    message: str = Field(..., description="Status message")
+
+
+class QueryRequest(BaseModel):
+    """Request to query a repository."""
+
+    question: str = Field(..., description="Question about the repository")
+    repo_id: str = Field(..., description="Repository ID to query")
+    top_k: int = Field(5, ge=1, le=20, description="Number of chunks to retrieve")
+
+
+class CitationResponse(BaseModel):
+    """Citation information."""
+
+    file_path: str
+    start_line: int
+    end_line: int
+
+    class Config:
+        from_attributes = True
+
+
+class RetrievedChunkResponse(BaseModel):
+    """Retrieved chunk information."""
+
+    chunk_id: str
+    file_path: str
+    start_line: int
+    end_line: int
+    relevance_score: float
+    chunk_type: str
+    name: Optional[str] = None
+    content: str
+
+    class Config:
+        from_attributes = True
+
+
+class QueryResponse(BaseModel):
+    """Response from a query."""
+
+    answer: str = Field(..., description="Generated answer")
+    citations: list[CitationResponse] = Field(..., description="Citations in the answer")
+    retrieved_chunks: list[RetrievedChunkResponse] = Field(..., description="Evidence chunks")
+    grounded: bool = Field(..., description="Whether response is grounded in evidence")
+    query_id: str = Field(..., description="Query ID")
+
+
+class RepositoryInfo(BaseModel):
+    """Repository information."""
+
+    id: str
+    url: str
+    branch: str
+    chunk_count: int
+    status: str
+    indexed_at: Optional[datetime] = None
+    error_message: Optional[str] = None
+
+
+class ListRepositoriesResponse(BaseModel):
+    """List of repositories."""
+
+    repositories: list[RepositoryInfo]
+
+
+class HealthResponse(BaseModel):
+    """Health check response."""
+
+    status: str
+    app: str
+    version: str
+
+
+class ErrorResponse(BaseModel):
+    """Error response."""
+
+    error: str
+    detail: Optional[str] = None
diff --git a/src/coderag/cli.py b/src/coderag/cli.py
new file mode 100644
index 0000000000000000000000000000000000000000..b9506f784fc13bf93b0ac411dbd1d742ec8d88cb
--- /dev/null
+++ b/src/coderag/cli.py
@@ -0,0 +1,513 @@
+"""Unified CLI for CodeRAG."""
+
+import json
+import os
+import platform
+import shutil
+import sys
+from pathlib import Path
+from typing import Optional
+
+import click
+
+
+# Config directory and file
+CONFIG_DIR = Path.home() / ".config" / "coderag"
+CONFIG_FILE = CONFIG_DIR / "config.json"
+
+
+def get_config() -> dict:
+    """Load configuration from config file."""
+    if CONFIG_FILE.exists():
+        try:
+            return json.loads(CONFIG_FILE.read_text())
+        except Exception:
+            return {}
+    return {}
+
+
+def save_config(config: dict) -> None:
+    """Save configuration to config file."""
+    CONFIG_DIR.mkdir(parents=True, exist_ok=True)
+    CONFIG_FILE.write_text(json.dumps(config, indent=2))
+
+
+def get_claude_config_path() -> Optional[Path]:
+    """Get Claude Desktop config path based on OS."""
+    system = platform.system()
+
+    if system == "Darwin":  # macOS
+        return Path.home() / "Library" / "Application Support" / "Claude" / "claude_desktop_config.json"
+    elif system == "Linux":
+        return Path.home() / ".config" / "Claude" / "claude_desktop_config.json"
+    elif system == "Windows":
+        appdata = os.environ.get("APPDATA", "")
+        if appdata:
+            return Path(appdata) / "Claude" / "claude_desktop_config.json"
+    return None
+
+
+@click.group()
+@click.version_option(package_name="coderag")
+def cli():
+    """CodeRAG - RAG-based Q&A system for code repositories.
+
+    Use 'coderag setup' to configure, then 'coderag serve' to start.
+    For Claude Desktop integration, run 'coderag mcp-install'.
+    """
+    pass
+
+
+@cli.command()
+@click.option("--provider", type=click.Choice(["groq", "openai", "anthropic", "openrouter", "together", "local"]),
+              default=None, help="LLM provider to use")
+@click.option("--api-key", default=None, help="API key for the provider")
+def setup(provider: Optional[str], api_key: Optional[str]):
+    """Interactive setup wizard for CodeRAG.
+
+    Configures the LLM provider and API key. Configuration is saved to
+    ~/.config/coderag/config.json and can be overridden by environment variables.
+    """
+    config = get_config()
+
+    click.echo("\n🔧 CodeRAG Setup\n")
+
+    # Provider selection
+    if provider is None:
+        click.echo("Select your LLM provider:")
+        click.echo("  1. groq (FREE, fast - recommended)")
+        click.echo("  2. openai")
+        click.echo("  3. anthropic")
+        click.echo("  4. openrouter")
+        click.echo("  5. together")
+        click.echo("  6. local (requires GPU)")
+
+        choice = click.prompt("Enter choice", type=int, default=1)
+        providers = {1: "groq", 2: "openai", 3: "anthropic", 4: "openrouter", 5: "together", 6: "local"}
+        provider = providers.get(choice, "groq")
+
+    config["llm_provider"] = provider
+
+    # API key (not needed for local)
+    if provider != "local":
+        if api_key is None:
+            api_key_urls = {
+                "groq": "https://console.groq.com/keys",
+                "openai": "https://platform.openai.com/api-keys",
+                "anthropic": "https://console.anthropic.com/settings/keys",
+                "openrouter": "https://openrouter.ai/keys",
+                "together": "https://api.together.xyz/settings/api-keys",
+            }
+            url = api_key_urls.get(provider, "")
+            if url:
+                click.echo(f"\nGet your API key from: {url}")
+
+            api_key = click.prompt("Enter your API key", hide_input=True)
+
+        config["llm_api_key"] = api_key
+
+        # Validate API key
+        click.echo("\n⏳ Validating API key...")
+        if _validate_api_key(provider, api_key):
+            click.echo("✅ API key is valid!")
+        else:
+            click.echo("⚠️  Could not validate API key. It may still work.")
+    else:
+        click.echo("\n⚠️  Local mode requires a CUDA-capable GPU.")
+
+    # Save config
+    save_config(config)
+    click.echo(f"\n✅ Configuration saved to {CONFIG_FILE}")
+
+    # Next steps
+    click.echo("\n📋 Next steps:")
+    click.echo("  1. Run 'coderag serve' to start the web interface")
+    click.echo("  2. Run 'coderag mcp-install' to integrate with Claude Desktop")
+    click.echo("  3. Run 'coderag index <url>' to index a repository")
+
+
+def _validate_api_key(provider: str, api_key: str) -> bool:
+    """Validate API key by making a test request."""
+    try:
+        from openai import OpenAI
+
+        base_urls = {
+            "groq": "https://api.groq.com/openai/v1",
+            "openai": "https://api.openai.com/v1",
+            "openrouter": "https://openrouter.ai/api/v1",
+            "together": "https://api.together.xyz/v1",
+        }
+
+        if provider not in base_urls:
+            return True  # Can't validate, assume OK
+
+        client = OpenAI(api_key=api_key, base_url=base_urls[provider])
+        client.models.list()
+        return True
+    except Exception:
+        return False
+
+
+@cli.command()
+@click.option("--host", default="0.0.0.0", help="Host to bind to")
+@click.option("--port", default=8000, type=int, help="Port to bind to")
+@click.option("--reload", is_flag=True, help="Enable auto-reload for development")
+def serve(host: str, port: int, reload: bool):
+    """Start the CodeRAG web server.
+
+    Starts the FastAPI server with Gradio UI, REST API, and MCP endpoint.
+    """
+    # Apply config from file to environment
+    _apply_config_to_env()
+
+    import uvicorn
+    from coderag.main import create_app
+    from coderag.config import get_settings
+
+    settings = get_settings()
+    app = create_app()
+
+    click.echo(f"\n🚀 Starting CodeRAG server at http://{host}:{port}")
+    click.echo("   Press Ctrl+C to stop\n")
+
+    uvicorn.run(
+        app,
+        host=host,
+        port=port,
+        reload=reload,
+        log_level=settings.server.log_level,
+    )
+
+
+@cli.command("mcp-run")
+def mcp_run():
+    """Run MCP server in stdio mode (for Claude Desktop).
+
+    This command is used by Claude Desktop to communicate with CodeRAG.
+    You typically don't need to run this manually.
+    """
+    # Apply config from file to environment
+    _apply_config_to_env()
+
+    # Suppress all output except MCP protocol
+    import logging
+    logging.basicConfig(level=logging.WARNING, stream=sys.stderr)
+
+    import structlog
+    structlog.configure(
+        wrapper_class=structlog.make_filtering_bound_logger(logging.CRITICAL),
+    )
+
+    from coderag.mcp.server import create_mcp_server
+    mcp = create_mcp_server()
+    mcp.run(transport="stdio")
+
+
+@cli.command("mcp-install")
+@click.option("--dry-run", is_flag=True, help="Preview changes without applying")
+def mcp_install(dry_run: bool):
+    """Configure Claude Desktop to use CodeRAG MCP.
+
+    Automatically detects your OS and updates the Claude Desktop configuration
+    to include the CodeRAG MCP server.
+    """
+    config_path = get_claude_config_path()
+
+    if config_path is None:
+        click.echo("❌ Could not determine Claude Desktop config location.")
+        click.echo("   Please manually add the MCP configuration.")
+        sys.exit(1)
+
+    click.echo(f"\n🔍 Claude Desktop config: {config_path}")
+
+    # Check if Claude Desktop is installed
+    if not config_path.parent.exists():
+        click.echo("\n❌ Claude Desktop does not appear to be installed.")
+        click.echo("   Install it from: https://claude.ai/download")
+        sys.exit(1)
+
+    # Load existing config or create new
+    if config_path.exists():
+        try:
+            config = json.loads(config_path.read_text())
+        except json.JSONDecodeError:
+            click.echo("⚠️  Existing config is invalid JSON. Creating new config.")
+            config = {}
+    else:
+        config = {}
+
+    # Ensure mcpServers key exists
+    if "mcpServers" not in config:
+        config["mcpServers"] = {}
+
+    # Find the coderag-mcp command path
+    coderag_path = shutil.which("coderag")
+    if coderag_path is None:
+        # Fallback to python -m
+        python_path = sys.executable
+        mcp_command = [python_path, "-m", "coderag.mcp.cli"]
+    else:
+        mcp_command = [coderag_path, "mcp-run"]
+
+    # Prepare MCP server config
+    new_mcp_config = {
+        "command": mcp_command[0],
+        "args": mcp_command[1:] if len(mcp_command) > 1 else [],
+    }
+
+    # Check if already configured
+    existing = config["mcpServers"].get("coderag")
+    if existing == new_mcp_config:
+        click.echo("\n✅ CodeRAG MCP is already configured correctly!")
+        return
+
+    # Show diff
+    click.echo("\n📝 Changes to be made:")
+    if existing:
+        click.echo(f"   Update: mcpServers.coderag")
+        click.echo(f"   From: {json.dumps(existing)}")
+        click.echo(f"   To:   {json.dumps(new_mcp_config)}")
+    else:
+        click.echo(f"   Add: mcpServers.coderag = {json.dumps(new_mcp_config)}")
+
+    if dry_run:
+        click.echo("\n🔍 Dry run - no changes made.")
+        return
+
+    # Backup existing config
+    if config_path.exists():
+        backup_path = config_path.with_suffix(".json.backup")
+        shutil.copy(config_path, backup_path)
+        click.echo(f"\n📦 Backup saved to: {backup_path}")
+
+    # Apply changes
+    config["mcpServers"]["coderag"] = new_mcp_config
+    config_path.parent.mkdir(parents=True, exist_ok=True)
+    config_path.write_text(json.dumps(config, indent=2))
+
+    click.echo("\n✅ Claude Desktop configuration updated!")
+    click.echo("\n⚠️  Please restart Claude Desktop to apply changes.")
+
+
+@cli.command("index")
+@click.argument("url")
+@click.option("--branch", default="", help="Branch to index (default: main/master)")
+def index(url: str, branch: str):
+    """Index a GitHub repository.
+
+    URL: The GitHub repository URL to index.
+
+    Example: coderag index https://github.com/owner/repo
+    """
+    # Apply config from file to environment
+    _apply_config_to_env()
+
+    import asyncio
+    from coderag.mcp.handlers import get_mcp_handlers
+
+    click.echo(f"\n📦 Indexing repository: {url}")
+    if branch:
+        click.echo(f"   Branch: {branch}")
+
+    handlers = get_mcp_handlers()
+
+    async def run_index():
+        result = await handlers.index_repository(url=url, branch=branch)
+        return result
+
+    result = asyncio.run(run_index())
+
+    if result.get("success"):
+        click.echo(f"\n✅ Repository indexed successfully!")
+        click.echo(f"   Repo ID: {result['repo_id']}")
+        click.echo(f"   Name: {result['name']}")
+        click.echo(f"   Files processed: {result['files_processed']}")
+        click.echo(f"   Chunks indexed: {result['chunks_indexed']}")
+        click.echo(f"\n   Use 'coderag query {result['repo_id'][:8]} \"your question\"' to query")
+    else:
+        click.echo(f"\n❌ Indexing failed: {result.get('error', 'Unknown error')}")
+        sys.exit(1)
+
+
+@cli.command("query")
+@click.argument("repo_id")
+@click.argument("question")
+@click.option("--top-k", default=5, type=int, help="Number of chunks to retrieve")
+@click.option("--format", "output_format", type=click.Choice(["text", "json"]), default="text", help="Output format")
+def query(repo_id: str, question: str, top_k: int, output_format: str):
+    """Ask a question about an indexed repository.
+
+    REPO_ID: Repository ID (full or first 8 characters)
+    QUESTION: Your question about the code
+
+    Example: coderag query abc12345 "How does authentication work?"
+    """
+    # Apply config from file to environment
+    _apply_config_to_env()
+
+    import asyncio
+    from coderag.mcp.handlers import get_mcp_handlers
+
+    handlers = get_mcp_handlers()
+
+    async def run_query():
+        result = await handlers.query_code(repo_id=repo_id, question=question, top_k=top_k)
+        return result
+
+    click.echo(f"\n🔍 Querying: {question}\n")
+    result = asyncio.run(run_query())
+
+    if result.get("error"):
+        click.echo(f"❌ Error: {result['error']}")
+        sys.exit(1)
+
+    if output_format == "json":
+        click.echo(json.dumps(result, indent=2))
+    else:
+        click.echo("📝 Answer:\n")
+        click.echo(result.get("answer", "No answer generated."))
+
+        if result.get("citations"):
+            click.echo("\n📍 Citations:")
+            for citation in result["citations"]:
+                click.echo(f"   {citation}")
+
+        if result.get("evidence"):
+            click.echo("\n📂 Evidence:")
+            for chunk in result["evidence"][:3]:  # Show top 3
+                click.echo(f"   - {chunk['file']}:{chunk['start_line']}-{chunk['end_line']} (relevance: {chunk['relevance']})")
+
+
+@cli.command("repos")
+@click.option("--format", "output_format", type=click.Choice(["text", "json"]), default="text", help="Output format")
+def repos(output_format: str):
+    """List all indexed repositories."""
+    # Apply config from file to environment
+    _apply_config_to_env()
+
+    import asyncio
+    from coderag.mcp.handlers import get_mcp_handlers
+
+    handlers = get_mcp_handlers()
+
+    async def run_list():
+        result = await handlers.list_repositories()
+        return result
+
+    result = asyncio.run(run_list())
+
+    if output_format == "json":
+        click.echo(json.dumps(result, indent=2))
+    else:
+        repos_list = result.get("repositories", [])
+        if not repos_list:
+            click.echo("\n📭 No repositories indexed yet.")
+            click.echo("   Run 'coderag index <url>' to index a repository.")
+            return
+
+        click.echo(f"\n📚 Indexed Repositories ({len(repos_list)}):\n")
+        for repo in repos_list:
+            status_icon = "✅" if repo["status"] == "ready" else "⏳" if repo["status"] == "indexing" else "❌"
+            click.echo(f"   {status_icon} {repo['id'][:8]}  {repo['name']} ({repo['branch']})")
+            click.echo(f"      Chunks: {repo['chunk_count']} | Indexed: {repo.get('indexed_at', 'N/A')}")
+
+
+@cli.command("doctor")
+def doctor():
+    """Diagnose common issues with CodeRAG setup.
+
+    Checks Python version, configuration, API key validity, and system components.
+    """
+    click.echo("\n🏥 CodeRAG Doctor\n")
+    all_ok = True
+
+    # Check Python version
+    py_version = sys.version_info
+    if py_version >= (3, 11):
+        click.echo(f"✅ Python version: {py_version.major}.{py_version.minor}.{py_version.micro}")
+    else:
+        click.echo(f"❌ Python version: {py_version.major}.{py_version.minor}.{py_version.micro} (need 3.11+)")
+        all_ok = False
+
+    # Check config file
+    config = get_config()
+    if config:
+        click.echo(f"✅ Config file exists: {CONFIG_FILE}")
+        if config.get("llm_provider"):
+            click.echo(f"   Provider: {config['llm_provider']}")
+    else:
+        click.echo(f"⚠️  No config file. Run 'coderag setup' to configure.")
+
+    # Check API key
+    api_key = config.get("llm_api_key") or os.environ.get("MODEL_LLM_API_KEY")
+    provider = config.get("llm_provider") or os.environ.get("MODEL_LLM_PROVIDER", "groq")
+
+    if provider != "local":
+        if api_key:
+            click.echo(f"✅ API key configured (provider: {provider})")
+        else:
+            click.echo(f"❌ No API key configured for {provider}")
+            all_ok = False
+
+    # Check CUDA
+    try:
+        import torch
+        if torch.cuda.is_available():
+            click.echo(f"✅ CUDA available: {torch.cuda.get_device_name(0)}")
+        else:
+            click.echo("ℹ️  CUDA not available (CPU mode for embeddings)")
+    except ImportError:
+        click.echo("⚠️  PyTorch not installed")
+        all_ok = False
+
+    # Check ChromaDB data directory
+    from coderag.config import get_settings
+    settings = get_settings()
+    chroma_path = settings.vectorstore.persist_directory
+    if chroma_path.exists():
+        click.echo(f"✅ ChromaDB directory: {chroma_path}")
+    else:
+        click.echo(f"ℹ️  ChromaDB directory will be created: {chroma_path}")
+
+    # Check Claude Desktop
+    claude_config = get_claude_config_path()
+    if claude_config and claude_config.exists():
+        try:
+            config_data = json.loads(claude_config.read_text())
+            if "coderag" in config_data.get("mcpServers", {}):
+                click.echo("✅ Claude Desktop MCP configured")
+            else:
+                click.echo("ℹ️  Claude Desktop installed but MCP not configured. Run 'coderag mcp-install'")
+        except Exception:
+            click.echo("⚠️  Claude Desktop config exists but could not be read")
+    else:
+        click.echo("ℹ️  Claude Desktop not detected")
+
+    # Summary
+    if all_ok:
+        click.echo("\n✅ All checks passed!")
+    else:
+        click.echo("\n⚠️  Some issues detected. See above for details.")
+
+
+def _apply_config_to_env():
+    """Apply configuration from config file to environment variables."""
+    config = get_config()
+
+    if config.get("llm_provider") and not os.environ.get("MODEL_LLM_PROVIDER"):
+        os.environ["MODEL_LLM_PROVIDER"] = config["llm_provider"]
+
+    if config.get("llm_api_key") and not os.environ.get("MODEL_LLM_API_KEY"):
+        os.environ["MODEL_LLM_API_KEY"] = config["llm_api_key"]
+
+    if config.get("embedding_device") and not os.environ.get("MODEL_EMBEDDING_DEVICE"):
+        os.environ["MODEL_EMBEDDING_DEVICE"] = config["embedding_device"]
+
+
+def main():
+    """Entry point for the CLI."""
+    cli()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/coderag/config.py b/src/coderag/config.py
new file mode 100644
index 0000000000000000000000000000000000000000..cc476e2402d94898461189268fe53b7336c1675d
--- /dev/null
+++ b/src/coderag/config.py
@@ -0,0 +1,154 @@
+"""Application configuration using pydantic-settings."""
+
+from pathlib import Path
+from typing import Optional
+
+from pydantic import Field
+from pydantic_settings import BaseSettings, SettingsConfigDict
+
+
+class ModelSettings(BaseSettings):
+    """LLM and embedding model configuration."""
+
+    model_config = SettingsConfigDict(env_prefix="MODEL_")
+
+    # LLM Provider: "local", "openai", "groq", "anthropic", "openrouter"
+    # Default to "groq" (free tier available, no GPU required)
+    llm_provider: str = "groq"
+
+    # API settings (for remote providers)
+    llm_api_key: Optional[str] = None
+    llm_api_base: Optional[str] = None  # Custom API base URL
+
+    # Model name (local or remote)
+    llm_name: str = "Qwen/Qwen2.5-Coder-3B-Instruct"
+    llm_max_new_tokens: int = 1024
+    llm_temperature: float = 0.1
+    llm_top_p: float = 0.95
+
+    # Local model settings
+    llm_use_4bit: bool = True
+    llm_device_map: str = "auto"
+
+    embedding_name: str = "nomic-ai/nomic-embed-text-v1.5"
+    embedding_dimension: int = 768
+    embedding_batch_size: int = 8  # Reduced for 8GB VRAM GPUs
+    embedding_device: str = "auto"  # "auto" detects CUDA, falls back to CPU
+
+
+class VectorStoreSettings(BaseSettings):
+    """ChromaDB vector store configuration."""
+
+    model_config = SettingsConfigDict(env_prefix="VECTORSTORE_")
+
+    persist_directory: Path = Path("./data/chroma_db")
+    collection_name: str = "coderag_chunks"
+    distance_metric: str = "cosine"
+    anonymized_telemetry: bool = False
+
+
+class IngestionSettings(BaseSettings):
+    """Repository ingestion configuration."""
+
+    model_config = SettingsConfigDict(env_prefix="INGESTION_")
+
+    repos_cache_dir: Path = Path("./data/repos")
+    max_file_size_kb: int = 500
+    default_branch: str = "main"
+    chunk_size: int = 1500
+    chunk_overlap: int = 200
+
+    # Large repository handling
+    max_files_per_repo: int = 5000
+    max_total_chunks: int = 50000
+    batch_size: int = 100
+    stream_processing: bool = True
+
+    # Warning thresholds
+    warn_files_threshold: int = 1000
+    warn_chunks_threshold: int = 10000
+
+    include_patterns: list[str] = Field(
+        default_factory=lambda: ["*.py", "*.js", "*.ts", "*.java", "*.go", "*.rs", "*.c", "*.cpp", "*.h"]
+    )
+    exclude_patterns: list[str] = Field(
+        default_factory=lambda: [
+            "**/node_modules/**",
+            "**/.git/**",
+            "**/venv/**",
+            "**/__pycache__/**",
+            "**/dist/**",
+            "**/build/**",
+            "**/*.min.js",
+            "**/*.min.css",
+            "**/package-lock.json",
+            "**/yarn.lock",
+            "**/poetry.lock",
+            "**/.env",
+            "**/.env.*",
+            "**/credentials*",
+            "**/*secret*",
+            "**/*password*",
+        ]
+    )
+
+
+class RetrievalSettings(BaseSettings):
+    """Retrieval configuration."""
+
+    model_config = SettingsConfigDict(env_prefix="RETRIEVAL_")
+
+    default_top_k: int = 5
+    max_top_k: int = 20
+    similarity_threshold: float = 0.3
+
+
+class ServerSettings(BaseSettings):
+    """Server configuration."""
+
+    model_config = SettingsConfigDict(env_prefix="SERVER_")
+
+    host: str = "0.0.0.0"
+    port: int = 8000
+    reload: bool = False
+    workers: int = 1
+    log_level: str = "info"
+
+
+class Settings(BaseSettings):
+    """Main application settings."""
+
+    model_config = SettingsConfigDict(
+        env_file=".env",
+        env_file_encoding="utf-8",
+        extra="ignore",
+    )
+
+    app_name: str = "CodeRAG"
+    app_version: str = "0.1.0"
+    debug: bool = False
+    data_dir: Path = Path("./data")
+
+    models: ModelSettings = Field(default_factory=ModelSettings)
+    vectorstore: VectorStoreSettings = Field(default_factory=VectorStoreSettings)
+    ingestion: IngestionSettings = Field(default_factory=IngestionSettings)
+    retrieval: RetrievalSettings = Field(default_factory=RetrievalSettings)
+    server: ServerSettings = Field(default_factory=ServerSettings)
+
+    def ensure_directories(self) -> None:
+        """Create required directories if they don't exist."""
+        self.data_dir.mkdir(parents=True, exist_ok=True)
+        self.vectorstore.persist_directory.mkdir(parents=True, exist_ok=True)
+        self.ingestion.repos_cache_dir.mkdir(parents=True, exist_ok=True)
+
+
+_settings: Optional[Settings] = None
+
+
+def get_settings() -> Settings:
+    """Get cached settings instance."""
+    global _settings
+    if _settings is None:
+        _settings = Settings()
+        _settings.ensure_directories()
+    return _settings
diff --git a/src/coderag/generation/__init__.py b/src/coderag/generation/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..9a1fea641a023d4d24c8a170eb0be13b4c003085
--- /dev/null
+++ b/src/coderag/generation/__init__.py
@@ -0,0 +1,7 @@
+"""Generation module: LLM inference and response generation with citations."""
+
+from coderag.generation.generator import ResponseGenerator
+from coderag.generation.prompts import SYSTEM_PROMPT, build_prompt
+from coderag.generation.citations import CitationParser
+
+__all__ = ["ResponseGenerator", "SYSTEM_PROMPT", "build_prompt", "CitationParser"]
diff --git a/src/coderag/generation/__pycache__/__init__.cpython-313.pyc b/src/coderag/generation/__pycache__/__init__.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c937047cc779b4ac34040b288c9aaff16be63469
Binary files /dev/null and b/src/coderag/generation/__pycache__/__init__.cpython-313.pyc differ
diff --git a/src/coderag/generation/__pycache__/citations.cpython-313.pyc b/src/coderag/generation/__pycache__/citations.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..72540ed1c732105424dc27bb8a26c596c3ae146b
Binary files /dev/null and b/src/coderag/generation/__pycache__/citations.cpython-313.pyc differ
diff --git a/src/coderag/generation/__pycache__/generator.cpython-313.pyc b/src/coderag/generation/__pycache__/generator.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5e3dbb176cc70df7c81e10b6a4d36e0efd828cb8
Binary files /dev/null and b/src/coderag/generation/__pycache__/generator.cpython-313.pyc differ
diff --git a/src/coderag/generation/__pycache__/prompts.cpython-313.pyc b/src/coderag/generation/__pycache__/prompts.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8f2fd1016d4cf8bf87fe13cda058901fdaed337b
Binary files /dev/null and b/src/coderag/generation/__pycache__/prompts.cpython-313.pyc differ
diff --git a/src/coderag/generation/citations.py b/src/coderag/generation/citations.py
new file mode 100644
index 0000000000000000000000000000000000000000..609718468ed9e9b29fe6cb5d3c3f4ac15d45544e
--- /dev/null
+++ b/src/coderag/generation/citations.py
@@ -0,0 +1,77 @@
+"""Citation parsing and formatting."""
+
+import re
+from typing import Optional
+
+from coderag.models.response import Citation
+
+
+class CitationParser:
+    """Parses and validates citations from LLM responses."""
+
+    # Pattern to match citations like [file.py:10-20] or [path/to/file.py:10-20]
+    CITATION_PATTERN = re.compile(r"\[([^\]]+):(\d+)-(\d+)\]")
+
+    def parse_citations(self, text: str) -> list[Citation]:
+        """Extract all citations from text.
+
+        Args:
+            text: Text containing citations
+
+        Returns:
+            List of parsed Citation objects
+        """
+        citations = []
+        for match in self.CITATION_PATTERN.finditer(text):
+            file_path = match.group(1)
+            start_line = int(match.group(2))
+            end_line = int(match.group(3))
+
+            citations.append(Citation(
+                file_path=file_path,
+                start_line=start_line,
+                end_line=end_line,
+            ))
+
+        return citations
+
+    def validate_citation(self, citation: Citation, available_files: set[str]) -> bool:
+        """Check if a citation references an existing file."""
+        return citation.file_path in available_files
+
+    def validate_citations(
+        self,
+        citations: list[Citation],
+        available_files: set[str],
+    ) -> tuple[list[Citation], list[Citation]]:
+        """Validate multiple citations.
+
+        Returns:
+            Tuple of (valid_citations, invalid_citations)
+        """
+        valid = []
+        invalid = []
+
+        for citation in citations:
+            if self.validate_citation(citation, available_files):
+                valid.append(citation)
+            else:
+                invalid.append(citation)
+
+        return valid, invalid
+
+    def format_citation(self, file_path: str, start_line: int, end_line: int) -> str:
+        """Format a citation string."""
+        return f"[{file_path}:{start_line}-{end_line}]"
+
+    def has_citations(self, text: str) -> bool:
+        """Check if text contains any citations."""
+        return bool(self.CITATION_PATTERN.search(text))
+
+    def count_citations(self, text: str) -> int:
+        """Count citations in text."""
+        return len(self.CITATION_PATTERN.findall(text))
+
+    def extract_unique_files(self, citations: list[Citation]) -> set[str]:
+        """Get unique file paths from citations."""
+        return {c.file_path for c in citations}
diff --git a/src/coderag/generation/generator.py b/src/coderag/generation/generator.py
new file mode 100644
index 0000000000000000000000000000000000000000..8055e3ac1118207de748beab66f0ab844242d7a1
--- /dev/null
+++ b/src/coderag/generation/generator.py
@@ -0,0 +1,241 @@
+"""Response generation using local or remote LLMs."""
+
+from typing import Optional
+
+from coderag.config import get_settings
+from coderag.generation.citations import CitationParser
+from coderag.generation.prompts import SYSTEM_PROMPT, build_prompt, build_no_context_response
+from coderag.logging import get_logger
+from coderag.models.response import Response
+from coderag.models.query import Query
+from coderag.retrieval.retriever import Retriever
+
+logger = get_logger(__name__)
+
+
+class ResponseGenerator:
+    """Generates grounded responses using local or remote LLMs."""
+
+    def __init__(
+        self,
+        retriever: Optional[Retriever] = None,
+    ) -> None:
+        self.settings = get_settings()
+        self.retriever = retriever or Retriever()
+        self.citation_parser = CitationParser()
+
+        self.provider = self.settings.models.llm_provider.lower()
+        self._client = None
+        self._local_model = None
+        self._local_tokenizer = None
+
+        logger.info("ResponseGenerator initialized", provider=self.provider)
+
+    def _get_api_client(self):
+        """Get or create API client for remote providers."""
+        if self._client is not None:
+            return self._client
+
+        import httpx
+        from openai import OpenAI
+
+        api_key = self.settings.models.llm_api_key
+        if not api_key:
+            raise ValueError(f"API key required for provider: {self.provider}")
+
+        # Provider-specific configurations
+        provider_configs = {
+            "openai": {
+                "base_url": "https://api.openai.com/v1",
+                "default_model": "gpt-4o-mini",
+            },
+            "groq": {
+                "base_url": "https://api.groq.com/openai/v1",
+                "default_model": "llama-3.3-70b-versatile",
+            },
+            "anthropic": {
+                "base_url": "https://api.anthropic.com/v1",
+                "default_model": "claude-3-5-sonnet-20241022",
+            },
+            "openrouter": {
+                "base_url": "https://openrouter.ai/api/v1",
+                "default_model": "anthropic/claude-3.5-sonnet",
+            },
+            "together": {
+                "base_url": "https://api.together.xyz/v1",
+                "default_model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
+            },
+        }
+
+        config = provider_configs.get(self.provider, {})
+        base_url = self.settings.models.llm_api_base or config.get("base_url")
+
+        if not base_url:
+            raise ValueError(f"Unknown provider: {self.provider}")
+
+        # Set default model if not specified and it's a known provider
+        if self.settings.models.llm_name.startswith("Qwen/"):
+            self.model_name = config.get("default_model", self.settings.models.llm_name)
+        else:
+            self.model_name = self.settings.models.llm_name
+
+        self._client = OpenAI(
+            api_key=api_key,
+            base_url=base_url,
+            http_client=httpx.Client(timeout=120.0),
+        )
+
+        logger.info("API client created", provider=self.provider, model=self.model_name)
+        return self._client
+
+    def _load_local_model(self):
+        """Load local model with transformers."""
+        if self._local_model is not None:
+            return
+
+        import torch
+        from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+
+        if not torch.cuda.is_available():
+            raise RuntimeError(
+                "Local LLM requires a CUDA-capable GPU. Options:\n"
+                "  1. Use a cloud provider (free): MODEL_LLM_PROVIDER=groq\n"
+                "     Get API key at: https://console.groq.com/keys\n"
+                "  2. Install CUDA and a compatible GPU"
+            )
+
+        logger.info("Loading local LLM", model=self.settings.models.llm_name)
+
+        if self.settings.models.llm_use_4bit:
+            bnb_config = BitsAndBytesConfig(
+                load_in_4bit=True,
+                bnb_4bit_quant_type="nf4",
+                bnb_4bit_compute_dtype=torch.float16,
+                bnb_4bit_use_double_quant=True,
+            )
+        else:
+            bnb_config = None
+
+        self._local_tokenizer = AutoTokenizer.from_pretrained(
+            self.settings.models.llm_name,
+            trust_remote_code=True,
+        )
+
+        self._local_model = AutoModelForCausalLM.from_pretrained(
+            self.settings.models.llm_name,
+            quantization_config=bnb_config,
+            device_map=self.settings.models.llm_device_map,
+            trust_remote_code=True,
+            torch_dtype=torch.float16,
+        )
+
+        logger.info("Local LLM loaded successfully")
+
+    def generate(self, query: Query) -> Response:
+        """Generate a response for a query."""
+        # Retrieve relevant chunks
+        chunks, context = self.retriever.retrieve_with_context(
+            query.question,
+            query.repo_id,
+            query.top_k,
+        )
+
+        # Handle no results
+        if not chunks:
+            return Response(
+                answer=build_no_context_response(),
+                citations=[],
+                retrieved_chunks=[],
+                grounded=False,
+                query_id=query.id,
+            )
+
+        # Build prompt and generate
+        prompt = build_prompt(query.question, context)
+
+        if self.provider == "local":
+            answer = self._generate_local(prompt)
+        else:
+            answer = self._generate_api(prompt)
+
+        # Parse citations from answer
+        citations = self.citation_parser.parse_citations(answer)
+
+        # Determine if response is grounded
+        grounded = len(citations) > 0 and len(chunks) > 0
+
+        return Response(
+            answer=answer,
+            citations=citations,
+            retrieved_chunks=chunks,
+            grounded=grounded,
+            query_id=query.id,
+        )
+
+    def _generate_api(self, prompt: str) -> str:
+        """Generate using remote API."""
+        client = self._get_api_client()
+
+        messages = [
+            {"role": "system", "content": SYSTEM_PROMPT},
+            {"role": "user", "content": prompt},
+        ]
+
+        response = client.chat.completions.create(
+            model=self.model_name,
+            messages=messages,
+            max_tokens=self.settings.models.llm_max_new_tokens,
+            temperature=self.settings.models.llm_temperature,
+            top_p=self.settings.models.llm_top_p,
+        )
+
+        return response.choices[0].message.content.strip()
+
+    def _generate_local(self, prompt: str) -> str:
+        """Generate using local model."""
+        import torch
+
+        self._load_local_model()
+
+        messages = [
+            {"role": "system", "content": SYSTEM_PROMPT},
+            {"role": "user", "content": prompt},
+        ]
+
+        text = self._local_tokenizer.apply_chat_template(
+            messages,
+            tokenize=False,
+            add_generation_prompt=True,
+        )
+
+        inputs = self._local_tokenizer(text, return_tensors="pt").to(self._local_model.device)
+
+        with torch.no_grad():
+            outputs = self._local_model.generate(
+                **inputs,
+                max_new_tokens=self.settings.models.llm_max_new_tokens,
+                temperature=self.settings.models.llm_temperature,
+                top_p=self.settings.models.llm_top_p,
+                do_sample=True,
+                pad_token_id=self._local_tokenizer.eos_token_id,
+            )
+
+        generated = outputs[0][inputs["input_ids"].shape[1]:]
+        response = self._local_tokenizer.decode(generated, skip_special_tokens=True)
+
+        return response.strip()
+
+    def unload(self) -> None:
+        """Unload models from memory."""
+        if self._local_model is not None:
+            del self._local_model
+            self._local_model = None
+        if self._local_tokenizer is not None:
+            del self._local_tokenizer
+            self._local_tokenizer = None
+
+        import torch
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+
+        logger.info("Models unloaded")
diff --git a/src/coderag/generation/prompts.py b/src/coderag/generation/prompts.py
new file mode 100644
index 0000000000000000000000000000000000000000..5723d491cac5e0316327dccc34c52bc2dc1576c6
--- /dev/null
+++ b/src/coderag/generation/prompts.py
@@ -0,0 +1,72 @@
+"""System prompts for grounded code Q&A."""
+
+SYSTEM_PROMPT = """You are a code assistant that answers questions about a repository.
+
+CRITICAL RULES - YOU MUST FOLLOW THESE:
+
+1. FIRST, check if the retrieved chunks are RELEVANT to the question being asked.
+   - If the chunks discuss completely different topics than the question, respond:
+     "I could not find information about this in the indexed repository."
+   - Do NOT try to make connections that don't exist.
+
+2. Only answer based on EXPLICIT information in the provided code chunks.
+   - Every claim MUST have a citation: [file_path:start_line-end_line]
+   - If you cannot cite it, do NOT say it.
+
+3. NEVER HALLUCINATE:
+   - Do NOT invent code, functions, files, or behaviors
+   - Do NOT answer questions about topics not in the chunks (e.g., if asked about "food inventory" but chunks are about "code embeddings", say you don't have that information)
+   - Do NOT make assumptions about what the code might do
+
+4. When to refuse:
+   - The question is about something not covered in the chunks
+   - The chunks are about a completely different topic
+   - You would need to guess or speculate
+
+CITATION FORMAT: [file_path:start_line-end_line]
+Example: [src/auth.py:45-78]
+
+RESPONSE FORMAT:
+- Start with a direct answer IF AND ONLY IF the chunks contain relevant information
+- Include citations inline with every factual statement
+- If showing code, quote it exactly from the chunks"""
+
+
+def build_prompt(question: str, context: str) -> str:
+    """Build the full prompt with context and question.
+
+    Args:
+        question: User's question
+        context: Retrieved code chunks formatted as context
+
+    Returns:
+        Complete prompt for the LLM
+    """
+    return f"""Based on the following code chunks from the repository, answer the question.
+
+## Retrieved Code Chunks
+
+{context}
+
+## Question
+
+{question}
+
+## Answer
+
+"""
+
+
+def build_no_context_response() -> str:
+    """Response when no relevant context is found."""
+    return "I could not find information about this in the indexed repository."
+
+
+def build_clarification_prompt(question: str, ambiguities: list[str]) -> str:
+    """Build prompt asking for clarification."""
+    ambiguity_list = "\n".join(f"- {a}" for a in ambiguities)
+    return f"""Your question "{question}" is ambiguous. Could you clarify:
+
+{ambiguity_list}
+
+Please provide more specific details so I can give you an accurate answer."""
diff --git a/src/coderag/indexing/__init__.py b/src/coderag/indexing/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..3be5f28625bdd87922f3c3a41a77e717a48e1c36
--- /dev/null
+++ b/src/coderag/indexing/__init__.py
@@ -0,0 +1,6 @@
+"""Indexing module: Embedding generation and vector storage."""
+
+from coderag.indexing.embeddings import EmbeddingGenerator
+from coderag.indexing.vectorstore import VectorStore
+
+__all__ = ["EmbeddingGenerator", "VectorStore"]
diff --git a/src/coderag/indexing/__pycache__/__init__.cpython-313.pyc b/src/coderag/indexing/__pycache__/__init__.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ecf73a67d3e14b957719cf0b51a4fdc2fde5c685
Binary files /dev/null and b/src/coderag/indexing/__pycache__/__init__.cpython-313.pyc differ
diff --git a/src/coderag/indexing/__pycache__/embeddings.cpython-313.pyc b/src/coderag/indexing/__pycache__/embeddings.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5ee5e6eee3141fcf353f3a52ad4141380c406e8b
Binary files /dev/null and b/src/coderag/indexing/__pycache__/embeddings.cpython-313.pyc differ
diff --git a/src/coderag/indexing/__pycache__/vectorstore.cpython-313.pyc b/src/coderag/indexing/__pycache__/vectorstore.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..87039025cf855a47e16aac74ba3ef9d921308eae
Binary files /dev/null and b/src/coderag/indexing/__pycache__/vectorstore.cpython-313.pyc differ
diff --git a/src/coderag/indexing/embeddings.py b/src/coderag/indexing/embeddings.py
new file mode 100644
index 0000000000000000000000000000000000000000..439e66a2bcb1b124ec5bf10f2671cda350732fa4
--- /dev/null
+++ b/src/coderag/indexing/embeddings.py
@@ -0,0 +1,147 @@
+"""Embedding generation using nomic-embed-text."""
+
+from typing import Iterator, Optional
+
+import torch
+from sentence_transformers import SentenceTransformer
+
+from coderag.config import get_settings
+from coderag.logging import get_logger
+from coderag.models.chunk import Chunk
+
+logger = get_logger(__name__)
+
+
+class EmbeddingGenerator:
+    """Generates embeddings using nomic-embed-text v1.5."""
+
+    def __init__(
+        self,
+        model_name: Optional[str] = None,
+        device: Optional[str] = None,
+        batch_size: Optional[int] = None,
+    ) -> None:
+        settings = get_settings()
+        self.model_name = model_name or settings.models.embedding_name
+        self.device = self._resolve_device(device or settings.models.embedding_device)
+        self.batch_size = batch_size or settings.models.embedding_batch_size
+        self._model: Optional[SentenceTransformer] = None
+
+    def _resolve_device(self, device: str) -> str:
+        """Resolve device, falling back to CPU if CUDA unavailable."""
+        if device == "auto":
+            return "cuda" if torch.cuda.is_available() else "cpu"
+        if device == "cuda" and not torch.cuda.is_available():
+            logger.warning("CUDA not available, falling back to CPU for embeddings")
+            return "cpu"
+        return device
+
+    @property
+    def model(self) -> SentenceTransformer:
+        if self._model is None:
+            self._load_model()
+        return self._model
+
+    def _load_model(self) -> None:
+        logger.info("Loading embedding model", model=self.model_name, device=self.device)
+        self._model = SentenceTransformer(
+            self.model_name,
+            device=self.device,
+            trust_remote_code=True,
+        )
+        logger.info("Embedding model loaded")
+
+    def generate_embedding(self, text: str, is_query: bool = False) -> list[float]:
+        # nomic-embed uses task prefixes
+        if is_query:
+            text = f"search_query: {text}"
+        else:
+            text = f"search_document: {text}"
+
+        embedding = self.model.encode(text, convert_to_numpy=True, normalize_embeddings=True)
+        return embedding.tolist()
+
+    def generate_embeddings(
+        self,
+        texts: list[str],
+        is_query: bool = False,
+        show_progress: bool = True,
+    ) -> list[list[float]]:
+        # Add prefixes
+        if is_query:
+            texts = [f"search_query: {t}" for t in texts]
+        else:
+            texts = [f"search_document: {t}" for t in texts]
+
+        embeddings = self.model.encode(
+            texts,
+            batch_size=self.batch_size,
+            convert_to_numpy=True,
+            normalize_embeddings=True,
+            show_progress_bar=show_progress,
+        )
+        return embeddings.tolist()
+
+    def embed_chunks(
+        self,
+        chunks: list[Chunk],
+        show_progress: bool = True,
+    ) -> list[Chunk]:
+        if not chunks:
+            return []
+
+        logger.info("Generating embeddings", num_chunks=len(chunks))
+
+        texts = [self._chunk_to_text(chunk) for chunk in chunks]
+        embeddings = self.generate_embeddings(texts, is_query=False, show_progress=show_progress)
+
+        for chunk, embedding in zip(chunks, embeddings):
+            chunk.embedding = embedding
+
+        logger.info("Embeddings generated", num_chunks=len(chunks))
+        return chunks
+
+    def embed_chunks_iter(
+        self,
+        chunks: Iterator[Chunk],
+        batch_size: Optional[int] = None,
+    ) -> Iterator[Chunk]:
+        batch_size = batch_size or self.batch_size
+        batch: list[Chunk] = []
+
+        for chunk in chunks:
+            batch.append(chunk)
+            if len(batch) >= batch_size:
+                yield from self._embed_batch(batch)
+                batch = []
+
+        if batch:
+            yield from self._embed_batch(batch)
+
+    def _embed_batch(self, batch: list[Chunk]) -> Iterator[Chunk]:
+        texts = [self._chunk_to_text(chunk) for chunk in batch]
+        embeddings = self.generate_embeddings(texts, is_query=False, show_progress=False)
+
+        for chunk, embedding in zip(batch, embeddings):
+            chunk.embedding = embedding
+            yield chunk
+
+    def _chunk_to_text(self, chunk: Chunk) -> str:
+        parts = []
+        if chunk.name:
+            parts.append(f"{chunk.chunk_type.value}: {chunk.name}")
+        if chunk.metadata.signature:
+            parts.append(f"Signature: {chunk.metadata.signature}")
+        if chunk.metadata.docstring:
+            parts.append(f"Docstring: {chunk.metadata.docstring[:200]}")
+        parts.append(f"File: {chunk.file_path}")
+        parts.append(chunk.content)
+        return "\n".join(parts)
+
+    def unload(self) -> None:
+        if self._model is not None:
+            del self._model
+            self._model = None
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+            logger.info("Embedding model unloaded")
diff --git a/src/coderag/indexing/vectorstore.py b/src/coderag/indexing/vectorstore.py
new file mode 100644
index 0000000000000000000000000000000000000000..0c699cf5b1f387e82a59c20ec89d45722eec7abe
--- /dev/null
+++ b/src/coderag/indexing/vectorstore.py
@@ -0,0 +1,170 @@
+"""ChromaDB vector store operations."""
+
+from pathlib import Path
+from typing import Optional
+
+import chromadb
+from chromadb.config import Settings
+
+from coderag.config import get_settings
+from coderag.logging import get_logger
+from coderag.models.chunk import Chunk
+
+logger = get_logger(__name__)
+
+
+class VectorStore:
+    """ChromaDB vector store for chunk storage and retrieval."""
+
+    def __init__(
+        self,
+        persist_directory: Optional[Path] = None,
+        collection_name: Optional[str] = None,
+    ) -> None:
+        settings = get_settings()
+        self.persist_directory = persist_directory or settings.vectorstore.persist_directory
+        self.collection_name = collection_name or settings.vectorstore.collection_name
+        self._client: Optional[chromadb.PersistentClient] = None
+        self._collection: Optional[chromadb.Collection] = None
+
+    @property
+    def client(self) -> chromadb.PersistentClient:
+        if self._client is None:
+            self._init_client()
+        return self._client
+
+    @property
+    def collection(self) -> chromadb.Collection:
+        if self._collection is None:
+            self._init_collection()
+        return self._collection
+
+    def _init_client(self) -> None:
+        logger.info("Initializing ChromaDB", path=str(self.persist_directory))
+        self.persist_directory.mkdir(parents=True, exist_ok=True)
+        self._client = chromadb.PersistentClient(
+            path=str(self.persist_directory),
+            settings=Settings(anonymized_telemetry=False),
+        )
+
+    def _init_collection(self) -> None:
+        self._collection = self.client.get_or_create_collection(
+            name=self.collection_name,
+            metadata={"hnsw:space": "cosine"},
+        )
+        logger.info("Collection initialized", name=self.collection_name)
+
+    def add_chunks(self, chunks: list[Chunk]) -> int:
+        if not chunks:
+            return 0
+
+        ids = [chunk.id for chunk in chunks]
+        embeddings = [chunk.embedding for chunk in chunks if chunk.embedding]
+        documents = [chunk.content for chunk in chunks]
+        metadatas = [chunk.to_dict() for chunk in chunks]
+
+        # Remove embedding and filter None values (ChromaDB doesn't accept None)
+        cleaned_metadatas = []
+        for m in metadatas:
+            m.pop("embedding", None)
+            m.pop("content", None)  # Already stored in documents
+            # Filter out None values - ChromaDB only accepts str, int, float, bool
+            cleaned = {k: v for k, v in m.items() if v is not None}
+            cleaned_metadatas.append(cleaned)
+
+        self.collection.add(
+            ids=ids,
+            embeddings=embeddings,
+            documents=documents,
+            metadatas=cleaned_metadatas,
+        )
+
+        logger.info("Chunks added to vector store", count=len(chunks))
+        return len(chunks)
+
+    def query(
+        self,
+        query_embedding: list[float],
+        repo_id: str,
+        top_k: int = 5,
+        similarity_threshold: float = 0.0,
+    ) -> list[tuple[Chunk, float]]:
+        results = self.collection.query(
+            query_embeddings=[query_embedding],
+            n_results=top_k,
+            where={"repo_id": repo_id},
+            include=["documents", "metadatas", "distances"],
+        )
+
+        chunks_with_scores = []
+        if results["ids"] and results["ids"][0]:
+            for i, chunk_id in enumerate(results["ids"][0]):
+                # ChromaDB returns distances, convert to similarity for cosine
+                distance = results["distances"][0][i]
+                similarity = 1 - distance
+
+                if similarity >= similarity_threshold:
+                    metadata = results["metadatas"][0][i]
+                    metadata["id"] = chunk_id
+                    metadata["content"] = results["documents"][0][i]
+                    chunk = Chunk.from_dict(metadata)
+                    chunks_with_scores.append((chunk, similarity))
+
+        return chunks_with_scores
+
+    def delete_repo_chunks(self, repo_id: str) -> int:
+        # Get all chunks for this repo
+        results = self.collection.get(where={"repo_id": repo_id}, include=[])
+
+        if results["ids"]:
+            self.collection.delete(ids=results["ids"])
+            count = len(results["ids"])
+            logger.info("Deleted repo chunks", repo_id=repo_id, count=count)
+            return count
+        return 0
+
+    def delete_file_chunks(self, repo_id: str, file_path: str) -> int:
+        """Delete chunks for a specific file in a repository (for incremental updates)."""
+        results = self.collection.get(
+            where={"$and": [{"repo_id": repo_id}, {"file_path": file_path}]},
+            include=[],
+        )
+
+        if results["ids"]:
+            self.collection.delete(ids=results["ids"])
+            count = len(results["ids"])
+            logger.info("Deleted file chunks", repo_id=repo_id, file_path=file_path, count=count)
+            return count
+        return 0
+
+    def get_indexed_files(self, repo_id: str) -> set[str]:
+        """Get set of file paths indexed for a repository."""
+        results = self.collection.get(
+            where={"repo_id": repo_id},
+            include=["metadatas"],
+        )
+
+        files = set()
+        if results["metadatas"]:
+            for metadata in results["metadatas"]:
+                if "file_path" in metadata:
+                    files.add(metadata["file_path"])
+        return files
+
+    def get_repo_chunk_count(self, repo_id: str) -> int:
+        results = self.collection.get(where={"repo_id": repo_id}, include=[])
+        return len(results["ids"]) if results["ids"] else 0
+
+    def get_all_repo_ids(self) -> list[str]:
+        results = self.collection.get(include=["metadatas"])
+        repo_ids = set()
+        if results["metadatas"]:
+            for metadata in results["metadatas"]:
+                if "repo_id" in metadata:
+                    repo_ids.add(metadata["repo_id"])
+        return list(repo_ids)
+
+    def clear(self) -> None:
+        self.client.delete_collection(self.collection_name)
+        self._collection = None
+        logger.info("Collection cleared", name=self.collection_name)
diff --git a/src/coderag/ingestion/__init__.py b/src/coderag/ingestion/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c87403eb8c53f6052da8dc8ef112425650cab369
--- /dev/null
+++ b/src/coderag/ingestion/__init__.py
@@ -0,0 +1,8 @@
+"""Ingestion module: Repository loading, file filtering, and semantic chunking."""
+
+from coderag.ingestion.validator import GitHubURLValidator
+from coderag.ingestion.loader import RepositoryLoader
+from coderag.ingestion.filter import FileFilter
+from coderag.ingestion.chunker import CodeChunker
+
+__all__ = ["GitHubURLValidator", "RepositoryLoader", "FileFilter", "CodeChunker"]
diff --git a/src/coderag/ingestion/__pycache__/__init__.cpython-313.pyc b/src/coderag/ingestion/__pycache__/__init__.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ef5f22e0603cd0f956bceb990010b8182eedd5a0
Binary files /dev/null and b/src/coderag/ingestion/__pycache__/__init__.cpython-313.pyc differ
diff --git a/src/coderag/ingestion/__pycache__/chunker.cpython-313.pyc b/src/coderag/ingestion/__pycache__/chunker.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5058c0cb5d1f3edac8c30a61599956fc09896c2e
Binary files /dev/null and b/src/coderag/ingestion/__pycache__/chunker.cpython-313.pyc differ
diff --git a/src/coderag/ingestion/__pycache__/filter.cpython-313.pyc b/src/coderag/ingestion/__pycache__/filter.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7d16244518fb400f59d1e82810bec26773dd26d5
Binary files /dev/null and b/src/coderag/ingestion/__pycache__/filter.cpython-313.pyc differ
diff --git a/src/coderag/ingestion/__pycache__/loader.cpython-313.pyc b/src/coderag/ingestion/__pycache__/loader.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..af6c727a4019b2cc87083eeec58be21a148ba099
Binary files /dev/null and b/src/coderag/ingestion/__pycache__/loader.cpython-313.pyc differ
diff --git a/src/coderag/ingestion/__pycache__/validator.cpython-313.pyc b/src/coderag/ingestion/__pycache__/validator.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9e4647c165ecf39e730e865b699c48e088063a63
Binary files /dev/null and b/src/coderag/ingestion/__pycache__/validator.cpython-313.pyc differ
diff --git a/src/coderag/ingestion/chunker.py b/src/coderag/ingestion/chunker.py
new file mode 100644
index 0000000000000000000000000000000000000000..1bb4f678821c639047bb9750c03d7833c54e3779
--- /dev/null
+++ b/src/coderag/ingestion/chunker.py
@@ -0,0 +1,184 @@
+"""Code chunking with Tree-sitter and text fallback."""
+
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Iterator, Optional
+
+from coderag.config import get_settings
+from coderag.logging import get_logger
+from coderag.models.chunk import Chunk, ChunkMetadata, ChunkType
+from coderag.models.document import Document
+
+logger = get_logger(__name__)
+
+
+@dataclass
+class ChunkerConfig:
+    """Chunker configuration."""
+    chunk_size: int = 1500
+    chunk_overlap: int = 200
+    min_chunk_size: int = 50
+
+
+class CodeChunker:
+    """Chunks code files into semantic units."""
+
+    def __init__(self, config: Optional[ChunkerConfig] = None) -> None:
+        settings = get_settings()
+        self.config = config or ChunkerConfig(
+            chunk_size=settings.ingestion.chunk_size,
+            chunk_overlap=settings.ingestion.chunk_overlap,
+        )
+        self._tree_sitter_available = self._check_tree_sitter()
+
+    def _check_tree_sitter(self) -> bool:
+        try:
+            import tree_sitter_python
+            return True
+        except ImportError:
+            logger.warning("Tree-sitter not available, using text chunking")
+            return False
+
+    def chunk_document(self, document: Document) -> Iterator[Chunk]:
+        if document.language == "python" and self._tree_sitter_available:
+            yield from self._chunk_python(document)
+        else:
+            yield from self._chunk_text(document)
+
+    def _chunk_python(self, document: Document) -> Iterator[Chunk]:
+        try:
+            import tree_sitter_python as tspython
+            from tree_sitter import Language, Parser
+
+            PY_LANGUAGE = Language(tspython.language())
+            parser = Parser(PY_LANGUAGE)
+            tree = parser.parse(bytes(document.content, "utf-8"))
+
+            yield from self._extract_python_chunks(tree.root_node, document)
+
+        except Exception as e:
+            logger.warning("Tree-sitter parsing failed, falling back to text", error=str(e))
+            yield from self._chunk_text(document)
+
+    def _extract_python_chunks(self, node, document: Document) -> Iterator[Chunk]:
+        lines = document.content.split("\n")
+
+        for child in node.children:
+            if child.type in ("function_definition", "async_function_definition"):
+                yield self._create_chunk_from_node(child, document, lines, ChunkType.FUNCTION)
+            elif child.type == "class_definition":
+                yield self._create_chunk_from_node(child, document, lines, ChunkType.CLASS)
+                # Also extract methods
+                for class_child in child.children:
+                    if class_child.type == "block":
+                        for block_child in class_child.children:
+                            if block_child.type in ("function_definition", "async_function_definition"):
+                                yield self._create_chunk_from_node(
+                                    block_child, document, lines, ChunkType.METHOD,
+                                    parent_name=self._get_node_name(child)
+                                )
+
+        # If no semantic chunks found, fall back to text chunking
+        if not any(child.type in ("function_definition", "class_definition", "async_function_definition")
+                   for child in node.children):
+            yield from self._chunk_text(document)
+
+    def _create_chunk_from_node(
+        self,
+        node,
+        document: Document,
+        lines: list[str],
+        chunk_type: ChunkType,
+        parent_name: Optional[str] = None,
+    ) -> Chunk:
+        start_line = node.start_point[0] + 1
+        end_line = node.end_point[0] + 1
+        content = "\n".join(lines[start_line - 1:end_line])
+        name = self._get_node_name(node)
+        signature = self._get_signature(node, lines)
+        docstring = self._get_docstring(node, lines)
+
+        metadata = ChunkMetadata(
+            file_path=document.file_path,
+            start_line=start_line,
+            end_line=end_line,
+            chunk_type=chunk_type,
+            language=document.language,
+            name=name,
+            signature=signature,
+            docstring=docstring,
+            parent_name=parent_name,
+        )
+
+        return Chunk(content=content, metadata=metadata, repo_id=document.repo_id)
+
+    def _get_node_name(self, node) -> Optional[str]:
+        for child in node.children:
+            if child.type == "identifier":
+                return child.text.decode("utf-8")
+        return None
+
+    def _get_signature(self, node, lines: list[str]) -> Optional[str]:
+        if node.type in ("function_definition", "async_function_definition"):
+            start_line = node.start_point[0]
+            return lines[start_line].strip()
+        return None
+
+    def _get_docstring(self, node, lines: list[str]) -> Optional[str]:
+        for child in node.children:
+            if child.type == "block":
+                for block_child in child.children:
+                    if block_child.type == "expression_statement":
+                        for expr_child in block_child.children:
+                            if expr_child.type == "string":
+                                return expr_child.text.decode("utf-8").strip('"""\'\'\'')
+        return None
+
+    def _chunk_text(self, document: Document) -> Iterator[Chunk]:
+        lines = document.content.split("\n")
+        chunk_size = self.config.chunk_size
+        overlap = self.config.chunk_overlap
+
+        current_start = 0
+        while current_start < len(lines):
+            # Calculate chunk boundaries
+            char_count = 0
+            end_line = current_start
+
+            while end_line < len(lines) and char_count < chunk_size:
+                char_count += len(lines[end_line]) + 1
+                end_line += 1
+
+            content = "\n".join(lines[current_start:end_line])
+
+            if len(content.strip()) >= self.config.min_chunk_size:
+                metadata = ChunkMetadata(
+                    file_path=document.file_path,
+                    start_line=current_start + 1,
+                    end_line=end_line,
+                    chunk_type=ChunkType.TEXT,
+                    language=document.language,
+                )
+                yield Chunk(content=content, metadata=metadata, repo_id=document.repo_id)
+
+            # Move start with overlap
+            overlap_lines = 0
+            overlap_chars = 0
+            while overlap_lines < end_line - current_start and overlap_chars < overlap:
+                overlap_chars += len(lines[end_line - 1 - overlap_lines]) + 1
+                overlap_lines += 1
+
+            current_start = end_line - overlap_lines
+            if current_start <= 0 or end_line >= len(lines):
+                break
+
+    def chunk_files(self, documents: Iterator[Document]) -> Iterator[Chunk]:
+        total_chunks = 0
+        for doc in documents:
+            doc_chunks = 0
+            for chunk in self.chunk_document(doc):
+                doc_chunks += 1
+                total_chunks += 1
+                yield chunk
+            logger.debug("Document chunked", file=doc.file_path, chunks=doc_chunks)
+        logger.info("Chunking complete", total_chunks=total_chunks)
diff --git a/src/coderag/ingestion/filter.py b/src/coderag/ingestion/filter.py
new file mode 100644
index 0000000000000000000000000000000000000000..4b062f69b1460f30ca54af388dc8581092c79db0
--- /dev/null
+++ b/src/coderag/ingestion/filter.py
@@ -0,0 +1,85 @@
+"""File filtering for repository indexing."""
+
+import fnmatch
+from pathlib import Path
+from typing import Iterator, Optional
+
+from coderag.config import get_settings
+from coderag.logging import get_logger
+
+logger = get_logger(__name__)
+
+
+class FileFilter:
+    """Filters files for indexing based on patterns."""
+
+    def __init__(
+        self,
+        include_patterns: Optional[list[str]] = None,
+        exclude_patterns: Optional[list[str]] = None,
+        max_file_size_kb: Optional[int] = None,
+    ) -> None:
+        settings = get_settings()
+        self.include_patterns = include_patterns or settings.ingestion.include_patterns
+        self.exclude_patterns = exclude_patterns or settings.ingestion.exclude_patterns
+        self.max_file_size = (max_file_size_kb or settings.ingestion.max_file_size_kb) * 1024
+
+    def should_include(self, file_path: Path, repo_root: Path) -> bool:
+        relative_path = str(file_path.relative_to(repo_root))
+
+        # Check exclusions first
+        for pattern in self.exclude_patterns:
+            if fnmatch.fnmatch(relative_path, pattern):
+                return False
+            if fnmatch.fnmatch(file_path.name, pattern):
+                return False
+
+        # Check inclusions
+        for pattern in self.include_patterns:
+            if fnmatch.fnmatch(file_path.name, pattern):
+                return True
+            if fnmatch.fnmatch(relative_path, pattern):
+                return True
+
+        return False
+
+    def check_file_size(self, file_path: Path) -> bool:
+        try:
+            return file_path.stat().st_size <= self.max_file_size
+        except OSError:
+            return False
+
+    def is_binary(self, file_path: Path) -> bool:
+        try:
+            with open(file_path, "rb") as f:
+                chunk = f.read(8192)
+                return b"\x00" in chunk
+        except (OSError, IOError):
+            return True
+
+    def filter_files(self, repo_root: Path) -> Iterator[Path]:
+        skipped_count = 0
+        included_count = 0
+
+        for file_path in repo_root.rglob("*"):
+            if not file_path.is_file():
+                continue
+
+            if not self.should_include(file_path, repo_root):
+                skipped_count += 1
+                continue
+
+            if not self.check_file_size(file_path):
+                logger.debug("Skipping large file", path=str(file_path))
+                skipped_count += 1
+                continue
+
+            if self.is_binary(file_path):
+                logger.debug("Skipping binary file", path=str(file_path))
+                skipped_count += 1
+                continue
+
+            included_count += 1
+            yield file_path
+
+        logger.info("File filtering complete", included=included_count, skipped=skipped_count)
diff --git a/src/coderag/ingestion/loader.py b/src/coderag/ingestion/loader.py
new file mode 100644
index 0000000000000000000000000000000000000000..6e8cd26db4b423804095ff410dc4917604140ded
--- /dev/null
+++ b/src/coderag/ingestion/loader.py
@@ -0,0 +1,117 @@
+"""Repository loading and cloning."""
+
+from pathlib import Path
+from typing import Callable, Optional
+
+from git import Repo, GitCommandError
+
+from coderag.config import get_settings
+from coderag.logging import get_logger
+from coderag.ingestion.validator import GitHubRepoInfo
+
+logger = get_logger(__name__)
+
+ProgressCallback = Callable[[str, int], None]
+
+
+class LoaderError(Exception):
+    """Repository loading error."""
+    pass
+
+
+class RepositoryLoader:
+    """Loads repositories from GitHub."""
+
+    def __init__(self, cache_dir: Optional[Path] = None) -> None:
+        settings = get_settings()
+        self.cache_dir = cache_dir or settings.ingestion.repos_cache_dir
+        self.cache_dir.mkdir(parents=True, exist_ok=True)
+
+    def get_repo_path(self, repo_info: GitHubRepoInfo) -> Path:
+        return self.cache_dir / repo_info.owner / repo_info.name
+
+    def clone_repository(
+        self,
+        repo_info: GitHubRepoInfo,
+        branch: Optional[str] = None,
+        progress_callback: Optional[ProgressCallback] = None,
+    ) -> Path:
+        repo_path = self.get_repo_path(repo_info)
+
+        # Try branches in order: specified, repo default, main, master
+        branches_to_try = []
+        if branch:
+            branches_to_try.append(branch)
+        if repo_info.branch and repo_info.branch not in branches_to_try:
+            branches_to_try.append(repo_info.branch)
+        if "main" not in branches_to_try:
+            branches_to_try.append("main")
+        if "master" not in branches_to_try:
+            branches_to_try.append("master")
+
+        if repo_path.exists():
+            logger.info("Repository exists, updating", path=str(repo_path))
+            return self._update_repository(repo_path, branches_to_try[0], progress_callback)
+
+        if progress_callback:
+            progress_callback("Cloning repository", 0)
+
+        repo_path.parent.mkdir(parents=True, exist_ok=True)
+
+        last_error = None
+        for try_branch in branches_to_try:
+            try:
+                logger.info("Trying to clone", url=repo_info.clone_url, branch=try_branch)
+                Repo.clone_from(
+                    repo_info.clone_url,
+                    repo_path,
+                    branch=try_branch,
+                    depth=1,
+                    single_branch=True,
+                )
+                if progress_callback:
+                    progress_callback("Clone complete", 100)
+                logger.info("Repository cloned", path=str(repo_path), branch=try_branch)
+                return repo_path
+            except GitCommandError as e:
+                last_error = e
+                logger.debug("Branch not found, trying next", branch=try_branch)
+                # Clean up partial clone if any
+                import shutil
+                shutil.rmtree(repo_path, ignore_errors=True)
+                continue
+
+        raise LoaderError(f"Failed to clone repository (tried branches: {branches_to_try}): {last_error}")
+
+    def _update_repository(
+        self,
+        repo_path: Path,
+        branch: str,
+        progress_callback: Optional[ProgressCallback] = None,
+    ) -> Path:
+        try:
+            repo = Repo(repo_path)
+            if progress_callback:
+                progress_callback("Fetching updates", 30)
+            repo.remotes.origin.fetch()
+            repo.git.checkout(branch)
+            repo.remotes.origin.pull()
+            if progress_callback:
+                progress_callback("Update complete", 100)
+            logger.info("Repository updated", path=str(repo_path))
+            return repo_path
+        except GitCommandError as e:
+            logger.warning("Update failed, re-cloning", error=str(e))
+            import shutil
+            shutil.rmtree(repo_path, ignore_errors=True)
+            raise LoaderError(f"Failed to update, please re-clone: {e}")
+
+    def is_cached(self, repo_info: GitHubRepoInfo) -> bool:
+        return self.get_repo_path(repo_info).exists()
+
+    def delete_cache(self, repo_info: GitHubRepoInfo) -> None:
+        repo_path = self.get_repo_path(repo_info)
+        if repo_path.exists():
+            import shutil
+            shutil.rmtree(repo_path)
+            logger.info("Cache deleted", path=str(repo_path))
diff --git a/src/coderag/ingestion/validator.py b/src/coderag/ingestion/validator.py
new file mode 100644
index 0000000000000000000000000000000000000000..410a9e0fc0c8c7630061ba146af21d4d44d7ad91
--- /dev/null
+++ b/src/coderag/ingestion/validator.py
@@ -0,0 +1,98 @@
+"""GitHub URL validation and parsing."""
+
+import re
+from dataclasses import dataclass
+from typing import Optional
+
+import httpx
+
+from coderag.logging import get_logger
+
+logger = get_logger(__name__)
+
+
+@dataclass
+class GitHubRepoInfo:
+    """Parsed GitHub repository information."""
+
+    owner: str
+    name: str
+    url: str
+    branch: Optional[str] = None
+
+    @property
+    def full_name(self) -> str:
+        return f"{self.owner}/{self.name}"
+
+    @property
+    def clone_url(self) -> str:
+        return f"https://github.com/{self.owner}/{self.name}.git"
+
+    @property
+    def api_url(self) -> str:
+        return f"https://api.github.com/repos/{self.owner}/{self.name}"
+
+
+class ValidationError(Exception):
+    """URL validation error."""
+    pass
+
+
+class GitHubURLValidator:
+    """Validates and parses GitHub repository URLs."""
+
+    GITHUB_PATTERNS = [
+        r"^https?://github\.com/(?P<owner>[^/]+)/(?P<name>[^/]+?)(?:\.git)?/?$",
+        r"^git@github\.com:(?P<owner>[^/]+)/(?P<name>[^/]+?)(?:\.git)?$",
+        r"^(?P<owner>[a-zA-Z0-9](?:[a-zA-Z0-9]|-(?=[a-zA-Z0-9])){0,38})/(?P<name>[a-zA-Z0-9._-]+)$",
+    ]
+
+    def __init__(self, timeout: float = 10.0) -> None:
+        self.timeout = timeout
+        self._patterns = [re.compile(p) for p in self.GITHUB_PATTERNS]
+
+    def parse_url(self, url: str) -> GitHubRepoInfo:
+        url = url.strip()
+        for pattern in self._patterns:
+            match = pattern.match(url)
+            if match:
+                owner = match.group("owner")
+                name = match.group("name").rstrip(".git")
+                if not self._is_valid_name(owner) or not self._is_valid_name(name):
+                    raise ValidationError(f"Invalid owner or repository name: {url}")
+                return GitHubRepoInfo(owner=owner, name=name, url=f"https://github.com/{owner}/{name}")
+        raise ValidationError(f"Invalid GitHub URL: {url}. Expected: https://github.com/owner/repo")
+
+    def _is_valid_name(self, name: str) -> bool:
+        if not name or len(name) > 100:
+            return False
+        return bool(re.match(r"^[a-zA-Z0-9][a-zA-Z0-9._-]*$", name))
+
+    async def validate_repository(self, url: str, check_accessibility: bool = True) -> GitHubRepoInfo:
+        repo_info = self.parse_url(url)
+        if check_accessibility:
+            await self._check_repo_accessible(repo_info)
+        logger.info("Repository validated", owner=repo_info.owner, name=repo_info.name)
+        return repo_info
+
+    async def _check_repo_accessible(self, repo_info: GitHubRepoInfo) -> None:
+        async with httpx.AsyncClient(timeout=self.timeout) as client:
+            try:
+                response = await client.get(repo_info.api_url)
+                if response.status_code == 404:
+                    raise ValidationError(f"Repository not found: {repo_info.full_name}")
+                elif response.status_code == 403:
+                    raise ValidationError(f"Access denied: {repo_info.full_name}")
+                elif response.status_code != 200:
+                    raise ValidationError(f"HTTP error {response.status_code}: {repo_info.full_name}")
+                data = response.json()
+                if data.get("private", False):
+                    raise ValidationError(f"Private repository not supported: {repo_info.full_name}")
+                repo_info.branch = data.get("default_branch", "main")
+            except httpx.TimeoutException:
+                raise ValidationError(f"Timeout checking repository: {repo_info.full_name}")
+            except httpx.RequestError as e:
+                raise ValidationError(f"Network error: {str(e)}")
+
+    def validate_url_sync(self, url: str) -> GitHubRepoInfo:
+        return self.parse_url(url)
diff --git a/src/coderag/logging.py b/src/coderag/logging.py
new file mode 100644
index 0000000000000000000000000000000000000000..67b50c9055a667d7976b0264d767b0f738ff4408
--- /dev/null
+++ b/src/coderag/logging.py
@@ -0,0 +1,111 @@
+"""Structured logging configuration using structlog."""
+
+import logging
+import sys
+from typing import Any
+
+import structlog
+from structlog.types import Processor
+
+
+def setup_logging(
+    level: str = "INFO",
+    json_format: bool = False,
+    log_file: str | None = None,
+) -> None:
+    """Configure structured logging for the application.
+
+    Args:
+        level: Log level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
+        json_format: If True, output logs as JSON (for production)
+        log_file: Optional file path for logging output
+    """
+    # Configure standard library logging
+    logging.basicConfig(
+        format="%(message)s",
+        stream=sys.stdout,
+        level=getattr(logging, level.upper()),
+    )
+
+    # Add file handler if specified
+    if log_file:
+        file_handler = logging.FileHandler(log_file)
+        file_handler.setLevel(getattr(logging, level.upper()))
+        logging.getLogger().addHandler(file_handler)
+
+    # Shared processors for all outputs
+    shared_processors: list[Processor] = [
+        structlog.contextvars.merge_contextvars,
+        structlog.stdlib.add_log_level,
+        structlog.stdlib.add_logger_name,
+        structlog.stdlib.PositionalArgumentsFormatter(),
+        structlog.processors.TimeStamper(fmt="iso"),
+        structlog.processors.StackInfoRenderer(),
+        structlog.processors.UnicodeDecoder(),
+    ]
+
+    if json_format:
+        # Production: JSON output
+        processors: list[Processor] = [
+            *shared_processors,
+            structlog.processors.format_exc_info,
+            structlog.processors.JSONRenderer(),
+        ]
+    else:
+        # Development: Colored console output
+        processors = [
+            *shared_processors,
+            structlog.dev.ConsoleRenderer(colors=True),
+        ]
+
+    structlog.configure(
+        processors=processors,
+        wrapper_class=structlog.stdlib.BoundLogger,
+        context_class=dict,
+        logger_factory=structlog.stdlib.LoggerFactory(),
+        cache_logger_on_first_use=True,
+    )
+
+
+def get_logger(name: str | None = None) -> structlog.stdlib.BoundLogger:
+    """Get a structured logger instance.
+
+    Args:
+        name: Logger name (usually __name__ of the calling module)
+
+    Returns:
+        Configured structlog logger
+    """
+    return structlog.get_logger(name)
+
+
+class LogContext:
+    """Context manager for adding temporary context to logs."""
+
+    def __init__(self, **kwargs: Any) -> None:
+        """Initialize with context variables."""
+        self.context = kwargs
+        self._token: Any = None
+
+    def __enter__(self) -> "LogContext":
+        """Bind context variables."""
+        self._token = structlog.contextvars.bind_contextvars(**self.context)
+        return self
+
+    def __exit__(self, *args: Any) -> None:
+        """Unbind context variables."""
+        structlog.contextvars.unbind_contextvars(*self.context.keys())
+
+
+def log_operation(
+    operation: str,
+    **kwargs: Any,
+) -> LogContext:
+    """Create a logging context for an operation.
+
+    Usage:
+        with log_operation("indexing", repo_id="123"):
+            # All logs within this block will include repo_id
+            logger.info("Starting indexing")
+    """
+    return LogContext(operation=operation, **kwargs)
diff --git a/src/coderag/main.py b/src/coderag/main.py
new file mode 100644
index 0000000000000000000000000000000000000000..7c473b86827f5a4fa883fc5bfa06447f86ad3c04
--- /dev/null
+++ b/src/coderag/main.py
@@ -0,0 +1,128 @@
+"""CodeRAG main application entry point."""
+
+from contextlib import asynccontextmanager
+
+import uvicorn
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+
+from coderag.config import get_settings
+from coderag.logging import setup_logging, get_logger
+
+# Initialize settings and logging
+settings = get_settings()
+setup_logging(level=settings.server.log_level.upper())
+logger = get_logger(__name__)
+
+
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """Application lifespan handler."""
+    logger.info(
+        "Starting CodeRAG",
+        app_name=settings.app_name,
+        version=settings.app_version,
+        debug=settings.debug,
+    )
+    yield
+    logger.info("Shutting down CodeRAG")
+
+
+def create_app() -> FastAPI:
+    """Create and configure the FastAPI application."""
+    app = FastAPI(
+        title=settings.app_name,
+        version=settings.app_version,
+        description="RAG-based Q&A system for code repositories with verifiable citations",
+        docs_url="/docs",
+        redoc_url="/redoc",
+        lifespan=lifespan,
+    )
+
+    # CORS middleware
+    app.add_middleware(
+        CORSMiddleware,
+        allow_origins=["*"],
+        allow_credentials=True,
+        allow_methods=["*"],
+        allow_headers=["*"],
+    )
+
+    # Health check endpoint
+    @app.get("/health")
+    async def health_check() -> dict:
+        """Health check endpoint."""
+        return {
+            "status": "healthy",
+            "app": settings.app_name,
+            "version": settings.app_version,
+        }
+
+    # Register API routes
+    from coderag.api.routes import router as api_router
+
+    app.include_router(api_router, prefix="/api/v1")
+
+    # Mount MCP server
+    try:
+        from coderag.mcp.server import create_mcp_server
+
+        mcp_server = create_mcp_server()
+        mcp_app = mcp_server.streamable_http_app()
+        app.mount("/mcp", mcp_app)
+        logger.info("MCP server mounted at /mcp")
+    except ImportError as e:
+        logger.warning("MCP server not available", error=str(e))
+    except Exception as e:
+        logger.error("Failed to mount MCP server", error=str(e))
+
+    # Mount Gradio UI
+    try:
+        from coderag.ui.app import create_gradio_app
+        import gradio as gr
+
+        gradio_app = create_gradio_app()
+        app = gr.mount_gradio_app(app, gradio_app, path="/")
+        logger.info("Gradio UI mounted at /")
+    except ImportError as e:
+        logger.warning("Gradio UI not available", error=str(e))
+    except Exception as e:
+        logger.error("Failed to mount Gradio UI", error=str(e))
+
+    return app
+
+
+def main() -> None:
+    """Run the application."""
+    app = create_app()
+
+    logger.info(
+        "Starting server",
+        host=settings.server.host,
+        port=settings.server.port,
+    )
+
+    uvicorn.run(
+        app,
+        host=settings.server.host,
+        port=settings.server.port,
+        reload=settings.server.reload,
+        workers=settings.server.workers,
+        log_level=settings.server.log_level,
+    )
+
+
+if __name__ == "__main__":
+    try:
+        main()
+    except KeyboardInterrupt:
+        logger.info("Application interrupted by user")
+    except Exception as e:
+        logger.error("Application crashed", error=str(e), exc_info=True)
+        import traceback
+        print("\n" + "="*80)
+        print("FATAL ERROR:")
+        print("="*80)
+        traceback.print_exc()
+        print("="*80)
+        input("Press Enter to close...")  # Keep terminal open
diff --git a/src/coderag/mcp/__init__.py b/src/coderag/mcp/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..815468f231599522ba47fe0465c9d86d8961a631
--- /dev/null
+++ b/src/coderag/mcp/__init__.py
@@ -0,0 +1,11 @@
+"""MCP (Model Context Protocol) server for CodeRAG."""
+
+from coderag.mcp.handlers import MCPHandlers, get_mcp_handlers
+from coderag.mcp.server import create_mcp_server, mcp
+
+__all__ = [
+    "MCPHandlers",
+    "get_mcp_handlers",
+    "create_mcp_server",
+    "mcp",
+]
diff --git a/src/coderag/mcp/__pycache__/__init__.cpython-313.pyc b/src/coderag/mcp/__pycache__/__init__.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4b99625304b491f7d6c88e25565b08716b4239b4
Binary files /dev/null and b/src/coderag/mcp/__pycache__/__init__.cpython-313.pyc differ
diff --git a/src/coderag/mcp/__pycache__/cli.cpython-313.pyc b/src/coderag/mcp/__pycache__/cli.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..15539cf05830afeadbbb03b6970d5e399c94a9fc
Binary files /dev/null and b/src/coderag/mcp/__pycache__/cli.cpython-313.pyc differ
diff --git a/src/coderag/mcp/__pycache__/handlers.cpython-313.pyc b/src/coderag/mcp/__pycache__/handlers.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..38338dc5d03a723514677efa06144cbdf89788b4
Binary files /dev/null and b/src/coderag/mcp/__pycache__/handlers.cpython-313.pyc differ
diff --git a/src/coderag/mcp/__pycache__/prompts.cpython-313.pyc b/src/coderag/mcp/__pycache__/prompts.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..14f68d9242dc160140f922673395c24c48227ca0
Binary files /dev/null and b/src/coderag/mcp/__pycache__/prompts.cpython-313.pyc differ
diff --git a/src/coderag/mcp/__pycache__/resources.cpython-313.pyc b/src/coderag/mcp/__pycache__/resources.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2ef76b856c22fea67d261418a1d9e7bb1a0fae5d
Binary files /dev/null and b/src/coderag/mcp/__pycache__/resources.cpython-313.pyc differ
diff --git a/src/coderag/mcp/__pycache__/server.cpython-313.pyc b/src/coderag/mcp/__pycache__/server.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..47bd3f0ccd8b4c71d01621967ecccf0903c6dfd4
Binary files /dev/null and b/src/coderag/mcp/__pycache__/server.cpython-313.pyc differ
diff --git a/src/coderag/mcp/__pycache__/tools.cpython-313.pyc b/src/coderag/mcp/__pycache__/tools.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..49dc82470dd83bfab4f91cdbb8071bfc31fd24d4
Binary files /dev/null and b/src/coderag/mcp/__pycache__/tools.cpython-313.pyc differ
diff --git a/src/coderag/mcp/cli.py b/src/coderag/mcp/cli.py
new file mode 100644
index 0000000000000000000000000000000000000000..70582228b8ee1c558e9e0ea7d8f3357432d7c1cf
--- /dev/null
+++ b/src/coderag/mcp/cli.py
@@ -0,0 +1,37 @@
+"""CLI entry point for running MCP server in stdio mode."""
+
+import sys
+import os
+
+# Suppress all stdout output except MCP protocol
+os.environ["PYTHONUNBUFFERED"] = "1"
+
+# Redirect any stray prints to stderr
+import io
+_original_stdout = sys.stdout
+
+
+def main():
+    """Run the MCP server in stdio mode for Claude Desktop."""
+    # Suppress logging to stdout - redirect to stderr
+    import logging
+    logging.basicConfig(
+        level=logging.WARNING,
+        stream=sys.stderr,
+        format="%(message)s"
+    )
+
+    # Suppress structlog output
+    import structlog
+    structlog.configure(
+        wrapper_class=structlog.make_filtering_bound_logger(logging.CRITICAL),
+    )
+
+    from coderag.mcp.server import create_mcp_server
+
+    mcp = create_mcp_server()
+    mcp.run(transport="stdio")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/coderag/mcp/handlers.py b/src/coderag/mcp/handlers.py
new file mode 100644
index 0000000000000000000000000000000000000000..8d6ca14c814e0b4f1f5486aead155e7102c33d23
--- /dev/null
+++ b/src/coderag/mcp/handlers.py
@@ -0,0 +1,545 @@
+"""MCP handlers for CodeRAG - non-streaming versions of UI handlers."""
+
+import json
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Optional
+
+import torch
+
+from coderag.config import get_settings
+from coderag.generation.generator import ResponseGenerator
+from coderag.indexing.embeddings import EmbeddingGenerator
+from coderag.indexing.vectorstore import VectorStore
+from coderag.ingestion.chunker import CodeChunker
+from coderag.ingestion.filter import FileFilter
+from coderag.ingestion.loader import RepositoryLoader
+from coderag.ingestion.validator import GitHubURLValidator, ValidationError
+from coderag.logging import get_logger
+from coderag.models.chunk import Chunk
+from coderag.models.document import Document
+from coderag.models.query import Query
+from coderag.models.repository import Repository, RepositoryStatus
+
+logger = get_logger(__name__)
+
+
+class MCPHandlers:
+    """Handlers for MCP tools - non-streaming versions."""
+
+    def __init__(self) -> None:
+        self.settings = get_settings()
+        self.validator = GitHubURLValidator()
+        self.loader = RepositoryLoader()
+        self.filter = FileFilter()
+        self.chunker = CodeChunker()
+        self.embedder = EmbeddingGenerator()
+        self.vectorstore = VectorStore()
+        self.generator: Optional[ResponseGenerator] = None
+
+        # Repository metadata storage (shared with UIHandlers)
+        self.repos_file = self.settings.data_dir / "repositories.json"
+        self.repositories: dict[str, Repository] = self._load_repositories()
+
+    def _load_repositories(self) -> dict[str, Repository]:
+        """Load repositories from JSON file."""
+        if self.repos_file.exists():
+            try:
+                data = json.loads(self.repos_file.read_text())
+                return {r["id"]: Repository.from_dict(r) for r in data}
+            except Exception as e:
+                logger.error("Failed to load repositories", error=str(e))
+        return {}
+
+    def _save_repositories(self) -> None:
+        """Save repositories to JSON file."""
+        self.repos_file.parent.mkdir(parents=True, exist_ok=True)
+        data = [r.to_dict() for r in self.repositories.values()]
+        self.repos_file.write_text(json.dumps(data, indent=2))
+
+    def _reload_repositories(self) -> None:
+        """Reload repositories from disk (for consistency with UIHandlers)."""
+        self.repositories = self._load_repositories()
+
+    def _find_repository(self, repo_id: str) -> Optional[Repository]:
+        """Find repository by full or partial ID."""
+        self._reload_repositories()
+        for rid, repo in self.repositories.items():
+            if rid == repo_id or rid.startswith(repo_id):
+                return repo
+        return None
+
+    def _process_batch(self, chunks: list[Chunk]) -> int:
+        """Process a batch: embed + store + release memory."""
+        if not chunks:
+            return 0
+
+        embedded = self.embedder.embed_chunks(chunks, show_progress=False)
+        self.vectorstore.add_chunks(embedded)
+
+        # Release memory
+        del embedded
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+
+        return len(chunks)
+
+    def _get_current_commit(self, repo_path: Path) -> str:
+        """Get the SHA of the current commit."""
+        from git import Repo
+
+        git_repo = Repo(repo_path)
+        return git_repo.head.commit.hexsha
+
+    def _get_changed_files(
+        self,
+        repo_path: Path,
+        last_commit: str,
+        current_commit: str,
+    ) -> tuple[set[str], set[str], set[str]]:
+        """Get files that were added, modified, or deleted between commits."""
+        from git import Repo
+
+        git_repo = Repo(repo_path)
+        diff = git_repo.commit(last_commit).diff(current_commit)
+
+        added: set[str] = set()
+        modified: set[str] = set()
+        deleted: set[str] = set()
+
+        for d in diff:
+            if d.new_file:
+                added.add(d.b_path)
+            elif d.deleted_file:
+                deleted.add(d.a_path)
+            elif d.renamed:
+                deleted.add(d.a_path)
+                added.add(d.b_path)
+            else:
+                modified.add(d.b_path or d.a_path)
+
+        return added, modified, deleted
+
+    async def index_repository(
+        self,
+        url: str,
+        branch: str = "",
+        include_patterns: Optional[list[str]] = None,
+        exclude_patterns: Optional[list[str]] = None,
+    ) -> dict[str, Any]:
+        """Index a GitHub repository (non-streaming version)."""
+        try:
+            logger.info("MCP: Starting indexing", url=url, branch=branch)
+            repo_info = self.validator.parse_url(url)
+            branch = branch.strip() if branch else repo_info.branch or "main"
+
+            # Create repository record
+            repo = Repository(
+                url=repo_info.url,
+                branch=branch,
+                status=RepositoryStatus.CLONING,
+            )
+            self.repositories[repo.id] = repo
+            self._save_repositories()
+
+            # Clone repository
+            logger.info("MCP: Cloning repository", url=url, branch=branch)
+            repo_path = self.loader.clone_repository(repo_info, branch)
+            repo.clone_path = repo_path
+            repo.status = RepositoryStatus.INDEXING
+            self._save_repositories()
+
+            # Setup filter with custom patterns
+            file_filter = FileFilter(
+                include_patterns=include_patterns,
+                exclude_patterns=exclude_patterns,
+            )
+
+            # Process files
+            logger.info("MCP: Filtering files", repo_path=str(repo_path))
+            files = list(file_filter.filter_files(repo_path))
+            file_count = len(files)
+            logger.info("MCP: Files to process", count=file_count)
+
+            # Delete existing chunks for this repo (re-indexing)
+            self.vectorstore.delete_repo_chunks(repo.id)
+
+            # Index all files
+            total_chunks = 0
+            batch: list[Chunk] = []
+            batch_size = self.settings.ingestion.batch_size
+
+            for file_path in files:
+                try:
+                    doc = Document.from_file(file_path, repo_path, repo.id)
+                    for chunk in self.chunker.chunk_document(doc):
+                        chunk.repo_id = repo.id
+                        batch.append(chunk)
+
+                        if len(batch) >= batch_size:
+                            total_chunks += self._process_batch(batch)
+                            batch = []
+                except Exception as e:
+                    logger.warning("Failed to process file", path=str(file_path), error=str(e))
+
+            # Process final batch
+            if batch:
+                total_chunks += self._process_batch(batch)
+
+            # Save commit for incremental updates
+            try:
+                repo.last_commit = self._get_current_commit(repo_path)
+            except Exception:
+                repo.last_commit = None
+
+            # Update repository status
+            repo.chunk_count = total_chunks
+            repo.indexed_at = datetime.now()
+            repo.status = RepositoryStatus.READY
+            self._save_repositories()
+
+            logger.info("MCP: Indexing complete", repo_id=repo.id, chunks=total_chunks)
+
+            return {
+                "success": True,
+                "repo_id": repo.id,
+                "name": repo.full_name,
+                "branch": repo.branch,
+                "files_processed": file_count,
+                "chunks_indexed": total_chunks,
+            }
+
+        except ValidationError as e:
+            logger.error("MCP: Validation error", error=str(e))
+            return {"success": False, "error": f"Validation error: {str(e)}"}
+        except Exception as e:
+            logger.error("MCP: Indexing failed", error=str(e), exc_info=True)
+            if "repo" in locals():
+                repo.status = RepositoryStatus.ERROR
+                repo.error_message = str(e)
+                self._save_repositories()
+            return {"success": False, "error": str(e)}
+
+    async def query_code(
+        self,
+        repo_id: str,
+        question: str,
+        top_k: int = 5,
+    ) -> dict[str, Any]:
+        """Ask a question about a repository."""
+        repo = self._find_repository(repo_id)
+
+        if not repo:
+            return {
+                "answer": "",
+                "citations": [],
+                "evidence": [],
+                "grounded": False,
+                "error": f"Repository not found: {repo_id}",
+            }
+
+        if repo.status != RepositoryStatus.READY:
+            return {
+                "answer": "",
+                "citations": [],
+                "evidence": [],
+                "grounded": False,
+                "error": f"Repository not ready: status is {repo.status.value}",
+            }
+
+        try:
+            # Lazy load generator
+            if self.generator is None:
+                self.generator = ResponseGenerator()
+
+            query = Query(
+                question=question.strip(),
+                repo_id=repo.id,
+                top_k=int(top_k),
+            )
+
+            response = self.generator.generate(query)
+
+            return {
+                "answer": response.answer,
+                "citations": response.citations,
+                "evidence": [
+                    {
+                        "file": chunk.file_path,
+                        "start_line": chunk.start_line,
+                        "end_line": chunk.end_line,
+                        "content": chunk.content[:500],  # Truncate for MCP response
+                        "relevance": round(chunk.relevance_score or 0, 3),
+                    }
+                    for chunk in response.retrieved_chunks
+                ],
+                "grounded": response.grounded,
+            }
+
+        except Exception as e:
+            logger.error("MCP: Query failed", error=str(e))
+            return {
+                "answer": "",
+                "citations": [],
+                "evidence": [],
+                "grounded": False,
+                "error": str(e),
+            }
+
+    async def list_repositories(self) -> dict[str, Any]:
+        """List all indexed repositories."""
+        self._reload_repositories()
+
+        repos = []
+        for repo in self.repositories.values():
+            repos.append({
+                "id": repo.id,
+                "name": repo.full_name,
+                "branch": repo.branch,
+                "status": repo.status.value,
+                "chunk_count": repo.chunk_count,
+                "indexed_at": repo.indexed_at.isoformat() if repo.indexed_at else None,
+            })
+
+        return {
+            "repositories": repos,
+            "count": len(repos),
+        }
+
+    async def get_repository_info(self, repo_id: str) -> dict[str, Any]:
+        """Get detailed repository information."""
+        repo = self._find_repository(repo_id)
+
+        if not repo:
+            return {"error": f"Repository not found: {repo_id}"}
+
+        # Get indexed files from vectorstore
+        indexed_files: list[str] = []
+        try:
+            files = self.vectorstore.get_indexed_files(repo.id)
+            indexed_files = list(files) if files else []
+        except Exception:
+            pass
+
+        return {
+            "id": repo.id,
+            "name": repo.name,
+            "full_name": repo.full_name,
+            "url": repo.url,
+            "branch": repo.branch,
+            "status": repo.status.value,
+            "chunk_count": repo.chunk_count,
+            "indexed_at": repo.indexed_at.isoformat() if repo.indexed_at else None,
+            "last_commit": repo.last_commit,
+            "indexed_files": indexed_files,
+            "error_message": repo.error_message,
+        }
+
+    async def delete_repository(self, repo_id: str) -> dict[str, Any]:
+        """Delete an indexed repository."""
+        repo = self._find_repository(repo_id)
+
+        if not repo:
+            return {"success": False, "error": f"Repository not found: {repo_id}"}
+
+        try:
+            # Get chunk count before deletion
+            chunk_count = self.vectorstore.get_repo_chunk_count(repo.id)
+
+            # Delete from vector store
+            self.vectorstore.delete_repo_chunks(repo.id)
+
+            # Delete cached repo
+            try:
+                self.loader.delete_cache(
+                    type("RepoInfo", (), {"owner": repo.owner, "name": repo.name})()
+                )
+            except Exception:
+                pass
+
+            # Remove from records
+            del self.repositories[repo.id]
+            self._save_repositories()
+
+            logger.info("MCP: Repository deleted", repo_id=repo.id)
+
+            return {
+                "success": True,
+                "repo_id": repo.id,
+                "name": repo.full_name,
+                "chunks_deleted": chunk_count,
+            }
+
+        except Exception as e:
+            logger.error("MCP: Delete failed", error=str(e))
+            return {"success": False, "error": str(e)}
+
+    async def update_repository(self, repo_id: str) -> dict[str, Any]:
+        """Incremental update of a repository."""
+        repo = self._find_repository(repo_id)
+
+        if not repo:
+            return {"success": False, "error": f"Repository not found: {repo_id}"}
+
+        if not repo.last_commit:
+            return {
+                "success": False,
+                "error": "No previous indexing found. Please re-index the full repository.",
+            }
+
+        if not repo.clone_path or not Path(repo.clone_path).exists():
+            return {"success": False, "error": "Repository cache not found. Please re-index."}
+
+        try:
+            repo_path = Path(repo.clone_path)
+
+            # Update local repository
+            logger.info("MCP: Updating local repository", repo_id=repo.id)
+            self.loader._update_repository(repo_path, repo.branch, None)
+
+            current_commit = self._get_current_commit(repo_path)
+
+            if current_commit == repo.last_commit:
+                return {
+                    "success": True,
+                    "message": "Repository is already up to date",
+                    "files_changed": 0,
+                    "chunks_added": 0,
+                    "chunks_deleted": 0,
+                }
+
+            added, modified, deleted = self._get_changed_files(
+                repo_path, repo.last_commit, current_commit
+            )
+
+            logger.info(
+                "MCP: Changes detected",
+                added=len(added),
+                modified=len(modified),
+                deleted=len(deleted),
+            )
+
+            # Delete chunks for deleted/modified files
+            chunks_deleted = 0
+            for file_path in deleted | modified:
+                count = self.vectorstore.delete_file_chunks(repo.id, file_path)
+                chunks_deleted += count if count else 0
+
+            # Index new/modified files
+            files_to_index = []
+            file_filter = FileFilter()
+            for file_path in added | modified:
+                full_path = repo_path / file_path
+                if full_path.exists() and file_filter.should_include(full_path, repo_path):
+                    files_to_index.append(full_path)
+
+            new_chunks = 0
+            if files_to_index:
+                batch_size = self.settings.ingestion.batch_size
+                batch: list[Chunk] = []
+
+                for file_path in files_to_index:
+                    try:
+                        doc = Document.from_file(file_path, repo_path, repo.id)
+                        for chunk in self.chunker.chunk_document(doc):
+                            chunk.repo_id = repo.id
+                            batch.append(chunk)
+
+                            if len(batch) >= batch_size:
+                                new_chunks += self._process_batch(batch)
+                                batch = []
+                    except Exception as e:
+                        logger.warning("Failed to process file", path=str(file_path), error=str(e))
+
+                if batch:
+                    new_chunks += self._process_batch(batch)
+
+            # Update metadata
+            repo.last_commit = current_commit
+            repo.indexed_at = datetime.now()
+            repo.chunk_count = self.vectorstore.get_repo_chunk_count(repo.id)
+            self._save_repositories()
+
+            return {
+                "success": True,
+                "files_changed": len(added | modified | deleted),
+                "files_added": len(added),
+                "files_modified": len(modified),
+                "files_deleted": len(deleted),
+                "chunks_added": new_chunks,
+                "chunks_deleted": chunks_deleted,
+                "total_chunks": repo.chunk_count,
+            }
+
+        except Exception as e:
+            logger.error("MCP: Incremental update failed", error=str(e), exc_info=True)
+            return {"success": False, "error": str(e)}
+
+    async def search_code(
+        self,
+        repo_id: str,
+        query: str,
+        top_k: int = 10,
+        file_filter: Optional[str] = None,
+        chunk_type: Optional[str] = None,
+    ) -> dict[str, Any]:
+        """Semantic code search without LLM generation."""
+        repo = self._find_repository(repo_id)
+
+        if not repo:
+            return {"results": [], "error": f"Repository not found: {repo_id}"}
+
+        if repo.status != RepositoryStatus.READY:
+            return {"results": [], "error": f"Repository not ready: status is {repo.status.value}"}
+
+        try:
+            # Generate query embedding
+            query_embedding = self.embedder.generate_embedding(query, is_query=True)
+
+            # Search vectorstore (query returns list of (Chunk, score) tuples)
+            results = self.vectorstore.query(
+                query_embedding=query_embedding,
+                repo_id=repo.id,
+                top_k=top_k,
+            )
+
+            # Filter by file pattern if provided
+            if file_filter:
+                import fnmatch
+
+                results = [(chunk, score) for chunk, score in results if fnmatch.fnmatch(chunk.file_path, file_filter)]
+
+            # Filter by chunk type if provided
+            if chunk_type:
+                results = [(chunk, score) for chunk, score in results if chunk.chunk_type == chunk_type]
+
+            return {
+                "results": [
+                    {
+                        "file_path": chunk.file_path,
+                        "start_line": chunk.start_line,
+                        "end_line": chunk.end_line,
+                        "chunk_type": chunk.chunk_type,
+                        "content": chunk.content,
+                        "relevance_score": round(score, 3),
+                    }
+                    for chunk, score in results[:top_k]
+                ],
+                "count": len(results),
+            }
+
+        except Exception as e:
+            logger.error("MCP: Search failed", error=str(e))
+            return {"results": [], "error": str(e)}
+
+
+# Singleton pattern
+_mcp_handlers: Optional[MCPHandlers] = None
+
+
+def get_mcp_handlers() -> MCPHandlers:
+    """Get the singleton MCPHandlers instance."""
+    global _mcp_handlers
+    if _mcp_handlers is None:
+        _mcp_handlers = MCPHandlers()
+    return _mcp_handlers
diff --git a/src/coderag/mcp/prompts.py b/src/coderag/mcp/prompts.py
new file mode 100644
index 0000000000000000000000000000000000000000..5273999a417a0c7061d2bfe36e3015a7aa6cefc7
--- /dev/null
+++ b/src/coderag/mcp/prompts.py
@@ -0,0 +1,149 @@
+"""MCP prompt templates for CodeRAG."""
+
+from mcp.types import PromptMessage, TextContent
+
+from coderag.mcp.server import mcp
+
+
+@mcp.prompt()
+async def analyze_repository(repo_url: str) -> list[PromptMessage]:
+    """Guide for comprehensive repository analysis.
+
+    Args:
+        repo_url: GitHub repository URL to analyze
+
+    Returns:
+        List of prompt messages guiding the analysis workflow
+    """
+    return [
+        PromptMessage(
+            role="user",
+            content=TextContent(
+                type="text",
+                text=f"""Please analyze the repository at {repo_url}. Follow these steps:
+
+1. First, use the `index_repository` tool to index the repository:
+   - URL: {repo_url}
+
+2. Once indexed, use `get_repository_info` to understand the repository structure:
+   - Note the number of files and chunks indexed
+   - Review the list of indexed files
+
+3. Use `query_code` to answer these questions:
+   - What is the main purpose of this codebase?
+   - What are the key components or modules?
+   - What design patterns are used?
+   - What external dependencies does it have?
+
+4. Use `search_code` to find:
+   - Entry points (main functions, CLI handlers)
+   - Configuration handling
+   - Core business logic
+
+5. Provide a comprehensive summary including:
+   - Purpose and functionality
+   - Architecture overview
+   - Key components
+   - Notable patterns or practices
+   - Potential areas for improvement
+""",
+            ),
+        )
+    ]
+
+
+@mcp.prompt()
+async def find_implementation(repo_id: str, feature: str) -> list[PromptMessage]:
+    """Guide for finding feature implementations.
+
+    Args:
+        repo_id: Repository ID to search in
+        feature: Feature or functionality to find
+
+    Returns:
+        List of prompt messages guiding the search workflow
+    """
+    return [
+        PromptMessage(
+            role="user",
+            content=TextContent(
+                type="text",
+                text=f"""Please find the implementation of "{feature}" in repository {repo_id}. Follow these steps:
+
+1. Use `search_code` to find relevant code:
+   - Query: "{feature}"
+   - Try different search terms if initial results aren't helpful
+
+2. For each relevant result, use `query_code` to understand:
+   - How is this feature implemented?
+   - What are the key functions/classes involved?
+   - What is the data flow?
+
+3. Trace the implementation:
+   - Find the entry point
+   - Follow the call chain
+   - Identify helper functions and utilities
+
+4. Provide a detailed explanation:
+   - Location of the implementation (files and line numbers)
+   - Key components and their roles
+   - How data flows through the system
+   - Any notable patterns or design decisions
+""",
+            ),
+        )
+    ]
+
+
+@mcp.prompt()
+async def code_review(repo_id: str, focus_area: str = "") -> list[PromptMessage]:
+    """Guide for performing code reviews.
+
+    Args:
+        repo_id: Repository ID to review
+        focus_area: Optional specific area to focus on (e.g., "security", "performance")
+
+    Returns:
+        List of prompt messages guiding the review workflow
+    """
+    focus_text = f' with focus on "{focus_area}"' if focus_area else ""
+
+    return [
+        PromptMessage(
+            role="user",
+            content=TextContent(
+                type="text",
+                text=f"""Please perform a code review of repository {repo_id}{focus_text}. Follow these steps:
+
+1. Use `get_repository_info` to understand the repository structure
+
+2. Use `search_code` to find key areas to review:
+   - Entry points and main functions
+   - Error handling patterns
+   - Data validation
+   - Security-sensitive code (if applicable)
+
+3. For each area, use `query_code` to analyze:
+   - Code quality and readability
+   - Error handling completeness
+   - Security considerations
+   - Performance implications
+   - Test coverage (if tests are indexed)
+
+4. Check for common issues:
+   - Hardcoded credentials or secrets
+   - SQL injection vulnerabilities
+   - Input validation gaps
+   - Resource leaks
+   - Race conditions
+
+5. Provide a structured review:
+   - Summary of findings
+   - Critical issues (if any)
+   - Recommendations for improvement
+   - Positive observations
+   - Priority of fixes
+""",
+            ),
+        )
+    ]
diff --git a/src/coderag/mcp/resources.py b/src/coderag/mcp/resources.py
new file mode 100644
index 0000000000000000000000000000000000000000..8797da7d7eddfa1598add2ed8b264ecd18bdbfa5
--- /dev/null
+++ b/src/coderag/mcp/resources.py
@@ -0,0 +1,33 @@
+"""MCP resource definitions for CodeRAG."""
+
+import json
+
+from coderag.mcp.handlers import get_mcp_handlers
+from coderag.mcp.server import mcp
+
+
+@mcp.resource("repository://{repo_id}")
+async def get_repository_resource(repo_id: str) -> str:
+    """Get repository metadata as JSON.
+
+    Args:
+        repo_id: Repository ID (full or first 8 characters)
+
+    Returns:
+        Repository metadata as JSON string
+    """
+    handlers = get_mcp_handlers()
+    result = await handlers.get_repository_info(repo_id=repo_id)
+    return json.dumps(result, indent=2)
+
+
+@mcp.resource("repositories://list")
+async def get_repositories_list() -> str:
+    """Get all repositories as JSON.
+
+    Returns:
+        List of all repositories as JSON string
+    """
+    handlers = get_mcp_handlers()
+    result = await handlers.list_repositories()
+    return json.dumps(result, indent=2)
diff --git a/src/coderag/mcp/server.py b/src/coderag/mcp/server.py
new file mode 100644
index 0000000000000000000000000000000000000000..2b1b2e539bc2d3f08f94dd002946df1f65b3b5ab
--- /dev/null
+++ b/src/coderag/mcp/server.py
@@ -0,0 +1,40 @@
+"""FastMCP server configuration for CodeRAG."""
+
+from mcp.server.fastmcp import FastMCP
+
+# Create the MCP server instance
+mcp = FastMCP(
+    name="CodeRAG",
+    instructions="""CodeRAG is a RAG-based Q&A system for code repositories.
+
+Available capabilities:
+- Index GitHub repositories for code analysis
+- Ask questions about indexed code with verifiable citations
+- Search code semantically
+- Manage indexed repositories
+
+Use the tools to:
+1. index_repository: Index a new GitHub repository
+2. query_code: Ask questions about indexed code
+3. search_code: Search code without LLM generation
+4. list_repositories: See all indexed repositories
+5. get_repository_info: Get details about a specific repository
+6. update_repository: Incrementally update a repository
+7. delete_repository: Remove an indexed repository
+
+Use the prompts for guided workflows:
+- analyze_repository: Comprehensive repository analysis
+- find_implementation: Find feature implementations
+- code_review: Perform code reviews
+""",
+)
+
+
+def create_mcp_server() -> FastMCP:
+    """Create and configure the MCP server with all tools, resources, and prompts."""
+    # Import tools, resources, and prompts to register them
+    from coderag.mcp import tools  # noqa: F401
+    from coderag.mcp import resources  # noqa: F401
+    from coderag.mcp import prompts  # noqa: F401
+
+    return mcp
diff --git a/src/coderag/mcp/tools.py b/src/coderag/mcp/tools.py
new file mode 100644
index 0000000000000000000000000000000000000000..1f080c13815ac10abaa10812b7f5cef533e39f0f
--- /dev/null
+++ b/src/coderag/mcp/tools.py
@@ -0,0 +1,140 @@
+"""MCP tool definitions for CodeRAG."""
+
+from typing import Optional
+
+from coderag.mcp.handlers import get_mcp_handlers
+from coderag.mcp.server import mcp
+
+
+@mcp.tool()
+async def index_repository(
+    url: str,
+    branch: str = "",
+    include_patterns: Optional[list[str]] = None,
+    exclude_patterns: Optional[list[str]] = None,
+) -> dict:
+    """Index a GitHub repository for code Q&A.
+
+    Args:
+        url: GitHub repository URL (e.g., https://github.com/owner/repo)
+        branch: Branch to index (defaults to main/master)
+        include_patterns: File patterns to include (e.g., ["*.py", "*.ts"])
+        exclude_patterns: File patterns to exclude (e.g., ["*_test.py"])
+
+    Returns:
+        dict with success status, repo_id, files_processed, and chunks_indexed
+    """
+    handlers = get_mcp_handlers()
+    return await handlers.index_repository(
+        url=url,
+        branch=branch,
+        include_patterns=include_patterns,
+        exclude_patterns=exclude_patterns,
+    )
+
+
+@mcp.tool()
+async def query_code(
+    repo_id: str,
+    question: str,
+    top_k: int = 5,
+) -> dict:
+    """Ask questions about indexed code with citations.
+
+    Args:
+        repo_id: Repository ID (full or first 8 characters)
+        question: Question about the code
+        top_k: Number of code chunks to retrieve for context (default: 5)
+
+    Returns:
+        dict with answer, citations, evidence, and grounded flag
+    """
+    handlers = get_mcp_handlers()
+    return await handlers.query_code(
+        repo_id=repo_id,
+        question=question,
+        top_k=top_k,
+    )
+
+
+@mcp.tool()
+async def list_repositories() -> dict:
+    """List all indexed repositories.
+
+    Returns:
+        dict with repositories array and count
+    """
+    handlers = get_mcp_handlers()
+    return await handlers.list_repositories()
+
+
+@mcp.tool()
+async def get_repository_info(repo_id: str) -> dict:
+    """Get detailed repository information.
+
+    Args:
+        repo_id: Repository ID (full or first 8 characters)
+
+    Returns:
+        dict with repository metadata including name, url, branch, chunk_count, status, and indexed_files
+    """
+    handlers = get_mcp_handlers()
+    return await handlers.get_repository_info(repo_id=repo_id)
+
+
+@mcp.tool()
+async def delete_repository(repo_id: str) -> dict:
+    """Remove an indexed repository.
+
+    Args:
+        repo_id: Repository ID (full or first 8 characters)
+
+    Returns:
+        dict with success status and chunks_deleted
+    """
+    handlers = get_mcp_handlers()
+    return await handlers.delete_repository(repo_id=repo_id)
+
+
+@mcp.tool()
+async def update_repository(repo_id: str) -> dict:
+    """Incremental update of a repository (only changed files).
+
+    Args:
+        repo_id: Repository ID (full or first 8 characters)
+
+    Returns:
+        dict with success status, files_changed, chunks_added, chunks_deleted
+    """
+    handlers = get_mcp_handlers()
+    return await handlers.update_repository(repo_id=repo_id)
+
+
+@mcp.tool()
+async def search_code(
+    repo_id: str,
+    query: str,
+    top_k: int = 10,
+    file_filter: Optional[str] = None,
+    chunk_type: Optional[str] = None,
+) -> dict:
+    """Semantic code search without LLM generation.
+
+    Args:
+        repo_id: Repository ID (full or first 8 characters)
+        query: Search query
+        top_k: Maximum number of results (default: 10)
+        file_filter: File pattern filter (e.g., "*.py")
+        chunk_type: Filter by chunk type (e.g., "function", "class")
+
+    Returns:
+        dict with results array containing file_path, start_line, end_line, content, and relevance_score
+    """
+    handlers = get_mcp_handlers()
+    return await handlers.search_code(
+        repo_id=repo_id,
+        query=query,
+        top_k=top_k,
+        file_filter=file_filter,
+        chunk_type=chunk_type,
+    )
diff --git a/src/coderag/models/__init__.py b/src/coderag/models/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..dedc59798e389470aaea4da50f47727f79a8a818
--- /dev/null
+++ b/src/coderag/models/__init__.py
@@ -0,0 +1,20 @@
+"""Models module: Core dataclasses and entities."""
+
+from coderag.models.document import Document, DocumentMetadata
+from coderag.models.chunk import Chunk, ChunkMetadata, ChunkType
+from coderag.models.response import Query, Response, Citation, RetrievedChunk
+from coderag.models.repository import Repository, RepositoryStatus
+
+__all__ = [
+    "Document",
+    "DocumentMetadata",
+    "Chunk",
+    "ChunkMetadata",
+    "ChunkType",
+    "Query",
+    "Response",
+    "Citation",
+    "RetrievedChunk",
+    "Repository",
+    "RepositoryStatus",
+]
diff --git a/src/coderag/models/__pycache__/__init__.cpython-313.pyc b/src/coderag/models/__pycache__/__init__.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9a89dcdd2fe0d85fda4bee369ae791679c12f8c2
Binary files /dev/null and b/src/coderag/models/__pycache__/__init__.cpython-313.pyc differ
diff --git a/src/coderag/models/__pycache__/chunk.cpython-313.pyc b/src/coderag/models/__pycache__/chunk.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2f8d7135c92acf13efc9a8b47108ebe9173d9ff9
Binary files /dev/null and b/src/coderag/models/__pycache__/chunk.cpython-313.pyc differ
diff --git a/src/coderag/models/__pycache__/document.cpython-313.pyc b/src/coderag/models/__pycache__/document.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b4aaf69bccbd26626cb2a1cb3e9f54be24591ca7
Binary files /dev/null and b/src/coderag/models/__pycache__/document.cpython-313.pyc differ
diff --git a/src/coderag/models/__pycache__/query.cpython-313.pyc b/src/coderag/models/__pycache__/query.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4b21b5e4daf6d3ff9982d5eb8698be058a708d03
Binary files /dev/null and b/src/coderag/models/__pycache__/query.cpython-313.pyc differ
diff --git a/src/coderag/models/__pycache__/repository.cpython-313.pyc b/src/coderag/models/__pycache__/repository.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9aea9668d49ba369da6387df6e475391ca9e1149
Binary files /dev/null and b/src/coderag/models/__pycache__/repository.cpython-313.pyc differ
diff --git a/src/coderag/models/__pycache__/response.cpython-313.pyc b/src/coderag/models/__pycache__/response.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..329c2a58310797b89ab0a06131e33cf0a26aa2e9
Binary files /dev/null and b/src/coderag/models/__pycache__/response.cpython-313.pyc differ
diff --git a/src/coderag/models/chunk.py b/src/coderag/models/chunk.py
new file mode 100644
index 0000000000000000000000000000000000000000..893703752be0cc8b949943b580161ee8625b0808
--- /dev/null
+++ b/src/coderag/models/chunk.py
@@ -0,0 +1,123 @@
+"""Chunk entity model for semantic code units."""
+
+from dataclasses import dataclass, field
+from enum import Enum
+from typing import Optional
+from uuid import uuid4
+
+
+class ChunkType(str, Enum):
+    """Type of code chunk."""
+
+    FUNCTION = "function"
+    CLASS = "class"
+    METHOD = "method"
+    MODULE = "module"
+    TEXT = "text"
+    DOCSTRING = "docstring"
+    COMMENT = "comment"
+
+
+@dataclass
+class ChunkMetadata:
+    """Metadata for a code chunk."""
+
+    file_path: str
+    start_line: int
+    end_line: int
+    chunk_type: ChunkType
+    language: Optional[str] = None
+    name: Optional[str] = None
+    signature: Optional[str] = None
+    docstring: Optional[str] = None
+    parent_name: Optional[str] = None
+
+    @property
+    def line_range(self) -> str:
+        """Get line range as string."""
+        return f"{self.start_line}-{self.end_line}"
+
+    @property
+    def citation(self) -> str:
+        """Get citation format."""
+        return f"[{self.file_path}:{self.start_line}-{self.end_line}]"
+
+
+@dataclass
+class Chunk:
+    """A semantic unit of code or documentation."""
+
+    content: str
+    metadata: ChunkMetadata
+    repo_id: str
+    id: str = field(default_factory=lambda: str(uuid4()))
+    embedding: Optional[list[float]] = None
+
+    @property
+    def file_path(self) -> str:
+        """Convenience accessor for file path."""
+        return self.metadata.file_path
+
+    @property
+    def start_line(self) -> int:
+        """Convenience accessor for start line."""
+        return self.metadata.start_line
+
+    @property
+    def end_line(self) -> int:
+        """Convenience accessor for end line."""
+        return self.metadata.end_line
+
+    @property
+    def chunk_type(self) -> ChunkType:
+        """Convenience accessor for chunk type."""
+        return self.metadata.chunk_type
+
+    @property
+    def name(self) -> Optional[str]:
+        """Convenience accessor for name."""
+        return self.metadata.name
+
+    @property
+    def citation(self) -> str:
+        """Get citation format."""
+        return self.metadata.citation
+
+    def to_dict(self) -> dict:
+        """Convert to dictionary for storage."""
+        return {
+            "id": self.id,
+            "content": self.content,
+            "repo_id": self.repo_id,
+            "file_path": self.metadata.file_path,
+            "start_line": self.metadata.start_line,
+            "end_line": self.metadata.end_line,
+            "chunk_type": self.metadata.chunk_type.value,
+            "language": self.metadata.language,
+            "name": self.metadata.name,
+            "signature": self.metadata.signature,
+            "docstring": self.metadata.docstring,
+            "parent_name": self.metadata.parent_name,
+        }
+
+    @classmethod
+    def from_dict(cls, data: dict, embedding: Optional[list[float]] = None) -> "Chunk":
+        """Create from dictionary."""
+        metadata = ChunkMetadata(
+            file_path=data["file_path"],
+            start_line=data["start_line"],
+            end_line=data["end_line"],
+            chunk_type=ChunkType(data["chunk_type"]),
+            language=data.get("language"),
+            name=data.get("name"),
+            signature=data.get("signature"),
+            docstring=data.get("docstring"),
+            parent_name=data.get("parent_name"),
+        )
+        return cls(
+            id=data["id"],
+            content=data["content"],
+            metadata=metadata,
+            repo_id=data["repo_id"],
+            embedding=embedding,
+        )
diff --git a/src/coderag/models/document.py b/src/coderag/models/document.py
new file mode 100644
index 0000000000000000000000000000000000000000..3bb7707aa20734b926d5e157ed79031e90842258
--- /dev/null
+++ b/src/coderag/models/document.py
@@ -0,0 +1,96 @@
+"""Document entity model for representing source files."""
+
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Optional
+
+
+@dataclass
+class DocumentMetadata:
+    """Metadata for a source document."""
+
+    file_path: str
+    language: Optional[str] = None
+    size_bytes: int = 0
+    line_count: int = 0
+    encoding: str = "utf-8"
+
+    @property
+    def extension(self) -> str:
+        """Get file extension."""
+        return Path(self.file_path).suffix.lstrip(".")
+
+
+@dataclass
+class Document:
+    """Represents a source code file loaded for processing."""
+
+    content: str
+    metadata: DocumentMetadata
+    repo_id: str = ""
+
+    @property
+    def file_path(self) -> str:
+        """Convenience accessor for file path."""
+        return self.metadata.file_path
+
+    @property
+    def language(self) -> Optional[str]:
+        """Convenience accessor for language."""
+        return self.metadata.language
+
+    @classmethod
+    def from_file(cls, file_path: Path, repo_root: Path, repo_id: str = "") -> "Document":
+        """Create Document from a file path."""
+        content = file_path.read_text(encoding="utf-8")
+        relative_path = str(file_path.relative_to(repo_root))
+        line_count = content.count("\n") + 1 if content else 0
+
+        language = _detect_language(file_path.suffix)
+
+        metadata = DocumentMetadata(
+            file_path=relative_path,
+            language=language,
+            size_bytes=file_path.stat().st_size,
+            line_count=line_count,
+        )
+
+        return cls(content=content, metadata=metadata, repo_id=repo_id)
+
+
+def _detect_language(extension: str) -> Optional[str]:
+    """Detect programming language from file extension."""
+    extension_map = {
+        ".py": "python",
+        ".js": "javascript",
+        ".ts": "typescript",
+        ".jsx": "javascript",
+        ".tsx": "typescript",
+        ".java": "java",
+        ".go": "go",
+        ".rs": "rust",
+        ".rb": "ruby",
+        ".php": "php",
+        ".c": "c",
+        ".cpp": "cpp",
+        ".h": "c",
+        ".hpp": "cpp",
+        ".cs": "csharp",
+        ".swift": "swift",
+        ".kt": "kotlin",
+        ".scala": "scala",
+        ".md": "markdown",
+        ".rst": "restructuredtext",
+        ".yaml": "yaml",
+        ".yml": "yaml",
+        ".json": "json",
+        ".toml": "toml",
+        ".xml": "xml",
+        ".html": "html",
+        ".css": "css",
+        ".sql": "sql",
+        ".sh": "bash",
+        ".bash": "bash",
+        ".zsh": "zsh",
+    }
+    return extension_map.get(extension.lower())
diff --git a/src/coderag/models/query.py b/src/coderag/models/query.py
new file mode 100644
index 0000000000000000000000000000000000000000..dad66d2de5fff264810d929593c8860f0ce85fc3
--- /dev/null
+++ b/src/coderag/models/query.py
@@ -0,0 +1,5 @@
+"""Query model (re-export for convenience)."""
+
+from coderag.models.response import Query
+
+__all__ = ["Query"]
diff --git a/src/coderag/models/repository.py b/src/coderag/models/repository.py
new file mode 100644
index 0000000000000000000000000000000000000000..09042e3f409912a7abbcc35ae4dfff13c932041c
--- /dev/null
+++ b/src/coderag/models/repository.py
@@ -0,0 +1,80 @@
+"""Repository entity model."""
+
+from dataclasses import dataclass, field
+from datetime import datetime
+from enum import Enum
+from pathlib import Path
+from typing import Optional
+from uuid import uuid4
+
+
+class RepositoryStatus(str, Enum):
+    """Repository indexing status."""
+
+    PENDING = "pending"
+    CLONING = "cloning"
+    INDEXING = "indexing"
+    READY = "ready"
+    ERROR = "error"
+
+
+@dataclass
+class Repository:
+    """Represents an indexed GitHub repository."""
+
+    url: str
+    branch: str = "main"
+    id: str = field(default_factory=lambda: str(uuid4()))
+    clone_path: Optional[Path] = None
+    indexed_at: Optional[datetime] = None
+    chunk_count: int = 0
+    status: RepositoryStatus = RepositoryStatus.PENDING
+    error_message: Optional[str] = None
+    last_commit: Optional[str] = None  # SHA of last indexed commit (for incremental updates)
+
+    @property
+    def name(self) -> str:
+        """Extract repository name from URL."""
+        return self.url.rstrip("/").split("/")[-1].replace(".git", "")
+
+    @property
+    def owner(self) -> str:
+        """Extract repository owner from URL."""
+        parts = self.url.rstrip("/").split("/")
+        return parts[-2] if len(parts) >= 2 else ""
+
+    @property
+    def full_name(self) -> str:
+        """Get owner/repo format."""
+        return f"{self.owner}/{self.name}"
+
+    def to_dict(self) -> dict:
+        """Convert to dictionary for serialization."""
+        return {
+            "id": self.id,
+            "url": self.url,
+            "branch": self.branch,
+            "clone_path": str(self.clone_path) if self.clone_path else None,
+            "indexed_at": self.indexed_at.isoformat() if self.indexed_at else None,
+            "chunk_count": self.chunk_count,
+            "status": self.status.value,
+            "error_message": self.error_message,
+            "last_commit": self.last_commit,
+        }
+
+    @classmethod
+    def from_dict(cls, data: dict) -> "Repository":
+        """Create from dictionary."""
+        return cls(
+            id=data["id"],
+            url=data["url"],
+            branch=data.get("branch", "main"),
+            clone_path=Path(data["clone_path"]) if data.get("clone_path") else None,
+            indexed_at=datetime.fromisoformat(data["indexed_at"])
+            if data.get("indexed_at")
+            else None,
+            chunk_count=data.get("chunk_count", 0),
+            status=RepositoryStatus(data.get("status", "pending")),
+            error_message=data.get("error_message"),
+            last_commit=data.get("last_commit"),
+        )
diff --git a/src/coderag/models/response.py b/src/coderag/models/response.py
new file mode 100644
index 0000000000000000000000000000000000000000..8b02c69012436d8ed52c13f49d5988405611445e
--- /dev/null
+++ b/src/coderag/models/response.py
@@ -0,0 +1,108 @@
+"""Response entity models for Q&A results."""
+
+from dataclasses import dataclass, field
+from datetime import datetime
+from typing import Optional
+from uuid import uuid4
+
+
+@dataclass
+class Citation:
+    """A reference to source code location."""
+
+    file_path: str
+    start_line: int
+    end_line: int
+
+    def __str__(self) -> str:
+        """Format as citation string."""
+        return f"[{self.file_path}:{self.start_line}-{self.end_line}]"
+
+    @classmethod
+    def parse(cls, citation_str: str) -> Optional["Citation"]:
+        """Parse citation from string format [file:start-end]."""
+        try:
+            citation_str = citation_str.strip("[]")
+            if ":" not in citation_str:
+                return None
+            file_path, line_range = citation_str.rsplit(":", 1)
+            if "-" in line_range:
+                start, end = line_range.split("-")
+                return cls(
+                    file_path=file_path,
+                    start_line=int(start),
+                    end_line=int(end),
+                )
+            else:
+                line = int(line_range)
+                return cls(file_path=file_path, start_line=line, end_line=line)
+        except (ValueError, IndexError):
+            return None
+
+
+@dataclass
+class RetrievedChunk:
+    """A chunk retrieved for answering a query."""
+
+    chunk_id: str
+    content: str
+    file_path: str
+    start_line: int
+    end_line: int
+    relevance_score: float
+    chunk_type: str
+    name: Optional[str] = None
+
+    @property
+    def citation(self) -> str:
+        """Get citation format."""
+        return f"[{self.file_path}:{self.start_line}-{self.end_line}]"
+
+
+@dataclass
+class Query:
+    """A user's question about a repository."""
+
+    question: str
+    repo_id: str
+    id: str = field(default_factory=lambda: str(uuid4()))
+    timestamp: datetime = field(default_factory=datetime.now)
+    top_k: int = 5
+
+
+@dataclass
+class Response:
+    """The system's answer to a query."""
+
+    answer: str
+    citations: list[Citation]
+    retrieved_chunks: list[RetrievedChunk]
+    grounded: bool
+    query_id: str = ""
+    confidence_score: float = 0.0
+
+    @property
+    def has_evidence(self) -> bool:
+        """Check if response has supporting evidence."""
+        return len(self.retrieved_chunks) > 0
+
+    @property
+    def citation_count(self) -> int:
+        """Count of citations in response."""
+        return len(self.citations)
+
+    def format_evidence(self) -> str:
+        """Format evidence section for display."""
+        if not self.retrieved_chunks:
+            return "No evidence retrieved."
+
+        lines = ["## Evidence\n"]
+        for i, chunk in enumerate(self.retrieved_chunks, 1):
+            lines.append(f"### {i}. {chunk.citation} (Score: {chunk.relevance_score:.3f})")
+            if chunk.name:
+                lines.append(f"**{chunk.chunk_type}**: `{chunk.name}`\n")
+            lines.append("```")
+            lines.append(chunk.content[:500] + ("..." if len(chunk.content) > 500 else ""))
+            lines.append("```\n")
+
+        return "\n".join(lines)
diff --git a/src/coderag/retrieval/__init__.py b/src/coderag/retrieval/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..523edbed68a9366df5f281bb2548cfa8d329ba51
--- /dev/null
+++ b/src/coderag/retrieval/__init__.py
@@ -0,0 +1,5 @@
+"""Retrieval module: Query processing and similarity search."""
+
+from coderag.retrieval.retriever import Retriever
+
+__all__ = ["Retriever"]
diff --git a/src/coderag/retrieval/__pycache__/__init__.cpython-313.pyc b/src/coderag/retrieval/__pycache__/__init__.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c98817da90c7efd5ff59f305dc135f751ea7ebe2
Binary files /dev/null and b/src/coderag/retrieval/__pycache__/__init__.cpython-313.pyc differ
diff --git a/src/coderag/retrieval/__pycache__/retriever.cpython-313.pyc b/src/coderag/retrieval/__pycache__/retriever.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..39f3efe08da3efee397c26a7d1f02b88d86ad65d
Binary files /dev/null and b/src/coderag/retrieval/__pycache__/retriever.cpython-313.pyc differ
diff --git a/src/coderag/retrieval/retriever.py b/src/coderag/retrieval/retriever.py
new file mode 100644
index 0000000000000000000000000000000000000000..c3555cebdb1e721e321dbe2b43e76f741d149b20
--- /dev/null
+++ b/src/coderag/retrieval/retriever.py
@@ -0,0 +1,91 @@
+"""Retrieval module for semantic search."""
+
+from typing import Optional
+
+from coderag.config import get_settings
+from coderag.indexing.embeddings import EmbeddingGenerator
+from coderag.indexing.vectorstore import VectorStore
+from coderag.logging import get_logger
+from coderag.models.chunk import Chunk
+from coderag.models.response import RetrievedChunk
+
+logger = get_logger(__name__)
+
+
+class Retriever:
+    """Retrieves relevant chunks for a query."""
+
+    def __init__(
+        self,
+        vectorstore: Optional[VectorStore] = None,
+        embedder: Optional[EmbeddingGenerator] = None,
+    ) -> None:
+        settings = get_settings()
+        self.vectorstore = vectorstore or VectorStore()
+        self.embedder = embedder or EmbeddingGenerator()
+        self.default_top_k = settings.retrieval.default_top_k
+        self.max_top_k = settings.retrieval.max_top_k
+        self.similarity_threshold = settings.retrieval.similarity_threshold
+
+    def retrieve(
+        self,
+        query: str,
+        repo_id: str,
+        top_k: Optional[int] = None,
+        similarity_threshold: Optional[float] = None,
+    ) -> list[RetrievedChunk]:
+        top_k = min(top_k or self.default_top_k, self.max_top_k)
+        threshold = similarity_threshold if similarity_threshold is not None else self.similarity_threshold
+
+        logger.info("Retrieving chunks", query=query[:100], repo_id=repo_id, top_k=top_k)
+
+        # Generate query embedding
+        query_embedding = self.embedder.generate_embedding(query, is_query=True)
+
+        # Search vector store
+        results = self.vectorstore.query(
+            query_embedding=query_embedding,
+            repo_id=repo_id,
+            top_k=top_k,
+            similarity_threshold=threshold,
+        )
+
+        # Convert to RetrievedChunk
+        retrieved_chunks = []
+        for chunk, score in results:
+            retrieved_chunk = RetrievedChunk(
+                chunk_id=chunk.id,
+                content=chunk.content,
+                file_path=chunk.file_path,
+                start_line=chunk.start_line,
+                end_line=chunk.end_line,
+                relevance_score=score,
+                chunk_type=chunk.chunk_type.value,
+                name=chunk.name,
+            )
+            retrieved_chunks.append(retrieved_chunk)
+
+        logger.info("Chunks retrieved", count=len(retrieved_chunks))
+        return retrieved_chunks
+
+    def retrieve_with_context(
+        self,
+        query: str,
+        repo_id: str,
+        top_k: Optional[int] = None,
+    ) -> tuple[list[RetrievedChunk], str]:
+        chunks = self.retrieve(query, repo_id, top_k)
+
+        # Build context string for LLM
+        context_parts = []
+        for i, chunk in enumerate(chunks, 1):
+            context_parts.append(
+                f"[{i}] {chunk.citation}\n"
+                f"Type: {chunk.chunk_type}"
+                f"{f' | Name: {chunk.name}' if chunk.name else ''}\n"
+                f"```\n{chunk.content}\n```\n"
+            )
+
+        context = "\n".join(context_parts) if context_parts else "No relevant code found."
+
+        return chunks, context
diff --git a/src/coderag/ui/__init__.py b/src/coderag/ui/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..7fce094b6208e7803cc07116083627aa0283fd84
--- /dev/null
+++ b/src/coderag/ui/__init__.py
@@ -0,0 +1,5 @@
+"""UI module: Gradio web interface."""
+
+from coderag.ui.app import create_gradio_app
+
+__all__ = ["create_gradio_app"]
diff --git a/src/coderag/ui/__pycache__/__init__.cpython-313.pyc b/src/coderag/ui/__pycache__/__init__.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..35cc16576736d77ea990276a529a62c360def352
Binary files /dev/null and b/src/coderag/ui/__pycache__/__init__.cpython-313.pyc differ
diff --git a/src/coderag/ui/__pycache__/app.cpython-313.pyc b/src/coderag/ui/__pycache__/app.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2cfce0e61000c5994ab40e0642816098a9504f29
Binary files /dev/null and b/src/coderag/ui/__pycache__/app.cpython-313.pyc differ
diff --git a/src/coderag/ui/__pycache__/handlers.cpython-313.pyc b/src/coderag/ui/__pycache__/handlers.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8c29f7f1191141bf012137891bdfe0f6c16ee363
Binary files /dev/null and b/src/coderag/ui/__pycache__/handlers.cpython-313.pyc differ
diff --git a/src/coderag/ui/app.py b/src/coderag/ui/app.py
new file mode 100644
index 0000000000000000000000000000000000000000..0ea2b2a9a18fd2432225666aa211d973a6f1786e
--- /dev/null
+++ b/src/coderag/ui/app.py
@@ -0,0 +1,166 @@
+"""Gradio web interface for CodeRAG."""
+
+from typing import Optional
+
+import gradio as gr
+
+from coderag.ui.handlers import UIHandlers
+
+
+def create_gradio_app() -> gr.Blocks:
+    """Create the Gradio application."""
+    handlers = UIHandlers()
+
+    with gr.Blocks(title="CodeRAG - Code Q&A with Citations") as app:
+        gr.Markdown("# CodeRAG - Code Q&A with Citations")
+        gr.Markdown("Index GitHub repositories and ask questions about the code with verifiable citations.")
+
+        with gr.Tabs():
+            # Tab 1: Index Repository
+            with gr.TabItem("Index Repository"):
+                with gr.Row():
+                    with gr.Column(scale=2):
+                        repo_url = gr.Textbox(
+                            label="GitHub Repository URL",
+                            placeholder="https://github.com/owner/repo",
+                            info="Enter a public GitHub repository URL",
+                        )
+
+                        with gr.Accordion("Advanced Options", open=False):
+                            branch = gr.Textbox(
+                                label="Branch",
+                                placeholder="main",
+                                value="",
+                                info="Leave empty for default branch",
+                            )
+                            include_patterns = gr.Textbox(
+                                label="Include Patterns",
+                                placeholder="*.py, *.js, *.md",
+                                value="",
+                                info="Comma-separated glob patterns (leave empty for defaults)",
+                            )
+                            exclude_patterns = gr.Textbox(
+                                label="Exclude Patterns",
+                                placeholder="**/tests/**, **/node_modules/**",
+                                value="",
+                                info="Comma-separated glob patterns (leave empty for defaults)",
+                            )
+
+                        index_btn = gr.Button("Index Repository", variant="primary")
+
+                    with gr.Column(scale=1):
+                        index_status = gr.Textbox(
+                            label="Status",
+                            interactive=False,
+                            lines=3,
+                        )
+                        index_progress = gr.Progress()
+
+                index_btn.click(
+                    fn=handlers.index_repository,
+                    inputs=[repo_url, branch, include_patterns, exclude_patterns],
+                    outputs=[index_status],
+                )
+
+            # Tab 2: Ask Questions
+            with gr.TabItem("Ask Questions"):
+                with gr.Row():
+                    with gr.Column(scale=2):
+                        repo_selector = gr.Dropdown(
+                            label="Select Repository",
+                            choices=[],
+                            interactive=True,
+                        )
+                        refresh_repos_btn = gr.Button("Refresh", size="sm")
+
+                        question = gr.Textbox(
+                            label="Question",
+                            placeholder="Where is the function X defined?",
+                            lines=2,
+                        )
+
+                        with gr.Row():
+                            top_k = gr.Slider(
+                                minimum=1,
+                                maximum=20,
+                                value=5,
+                                step=1,
+                                label="Number of chunks to retrieve",
+                            )
+
+                        ask_btn = gr.Button("Ask", variant="primary")
+
+                    with gr.Column(scale=1):
+                        qa_status = gr.Textbox(
+                            label="Status",
+                            interactive=False,
+                            lines=1,
+                        )
+
+                with gr.Row():
+                    answer_output = gr.Markdown(label="Answer")
+
+                with gr.Accordion("Evidence", open=True):
+                    evidence_output = gr.Markdown(label="Retrieved Chunks")
+
+                refresh_repos_btn.click(
+                    fn=handlers.get_repositories,
+                    outputs=[repo_selector],
+                )
+
+                ask_btn.click(
+                    fn=handlers.ask_question,
+                    inputs=[repo_selector, question, top_k],
+                    outputs=[answer_output, evidence_output, qa_status],
+                )
+
+            # Tab 3: Manage Repositories
+            with gr.TabItem("Manage Repositories"):
+                repos_table = gr.Dataframe(
+                    headers=["ID", "Repository", "Branch", "Chunks", "Status", "Indexed At"],
+                    label="Indexed Repositories",
+                    interactive=False,
+                )
+
+                with gr.Row():
+                    refresh_table_btn = gr.Button("Refresh", size="sm")
+
+                gr.Markdown("### Actions")
+
+                with gr.Row():
+                    with gr.Column(scale=2):
+                        action_repo_id = gr.Textbox(
+                            label="Repository ID",
+                            placeholder="Enter repository ID (or first 8 characters)",
+                            info="Copy the ID from the table above",
+                        )
+                    with gr.Column(scale=1):
+                        update_btn = gr.Button("Update (Incremental)", variant="secondary")
+                        delete_btn = gr.Button("Delete", variant="stop")
+
+                action_status = gr.Textbox(label="Status", interactive=False, lines=5)
+
+                refresh_table_btn.click(
+                    fn=handlers.get_repositories_table,
+                    outputs=[repos_table],
+                )
+
+                update_btn.click(
+                    fn=handlers.index_repository_incremental,
+                    inputs=[action_repo_id],
+                    outputs=[action_status],
+                )
+
+                delete_btn.click(
+                    fn=handlers.delete_repository,
+                    inputs=[action_repo_id],
+                    outputs=[action_status, repos_table],
+                )
+
+        # Load initial data
+        app.load(
+            fn=handlers.get_repositories,
+            outputs=[repo_selector],
+        )
+
+    return app
diff --git a/src/coderag/ui/handlers.py b/src/coderag/ui/handlers.py
new file mode 100644
index 0000000000000000000000000000000000000000..baa3b3503da918d0e55258426d2f84b9a7e1504a
--- /dev/null
+++ b/src/coderag/ui/handlers.py
@@ -0,0 +1,500 @@
+"""UI event handlers for Gradio interface."""
+
+import json
+from datetime import datetime
+from pathlib import Path
+from typing import Iterator, Optional
+
+import torch
+
+from coderag.config import get_settings
+from coderag.generation.generator import ResponseGenerator
+from coderag.indexing.embeddings import EmbeddingGenerator
+from coderag.indexing.vectorstore import VectorStore
+from coderag.ingestion.chunker import CodeChunker
+from coderag.ingestion.filter import FileFilter
+from coderag.ingestion.loader import RepositoryLoader
+from coderag.ingestion.validator import GitHubURLValidator, ValidationError
+from coderag.logging import get_logger
+from coderag.models.chunk import Chunk
+from coderag.models.document import Document
+from coderag.models.query import Query
+from coderag.models.repository import Repository, RepositoryStatus
+
+logger = get_logger(__name__)
+
+
+class UIHandlers:
+    """Handlers for Gradio UI events."""
+
+    def __init__(self) -> None:
+        self.settings = get_settings()
+        self.validator = GitHubURLValidator()
+        self.loader = RepositoryLoader()
+        self.filter = FileFilter()
+        self.chunker = CodeChunker()
+        self.embedder = EmbeddingGenerator()
+        self.vectorstore = VectorStore()
+        self.generator: Optional[ResponseGenerator] = None
+
+        # Repository metadata storage
+        self.repos_file = self.settings.data_dir / "repositories.json"
+        self.repositories: dict[str, Repository] = self._load_repositories()
+
+    def _load_repositories(self) -> dict[str, Repository]:
+        if self.repos_file.exists():
+            try:
+                data = json.loads(self.repos_file.read_text())
+                return {r["id"]: Repository.from_dict(r) for r in data}
+            except Exception as e:
+                logger.error("Failed to load repositories", error=str(e))
+        return {}
+
+    def _save_repositories(self) -> None:
+        self.repos_file.parent.mkdir(parents=True, exist_ok=True)
+        data = [r.to_dict() for r in self.repositories.values()]
+        self.repos_file.write_text(json.dumps(data, indent=2))
+
+    # =========================================================================
+    # Streaming Methods (Nivel 1)
+    # =========================================================================
+
+    def _document_generator(
+        self,
+        files: list[Path],
+        repo_path: Path,
+        repo_id: str,
+    ) -> Iterator[Document]:
+        """Generate documents one by one without accumulating in memory."""
+        for file_path in files:
+            try:
+                yield Document.from_file(file_path, repo_path, repo_id)
+            except Exception as e:
+                logger.warning("Failed to load file", path=str(file_path), error=str(e))
+
+    def _process_batch(self, chunks: list[Chunk]) -> int:
+        """Process a batch: embed + store + release memory."""
+        if not chunks:
+            return 0
+
+        embedded = self.embedder.embed_chunks(chunks, show_progress=False)
+        self.vectorstore.add_chunks(embedded)
+
+        # Release memory
+        del embedded
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+
+        return len(chunks)
+
+    def _stream_index_repository(
+        self,
+        documents: Iterator[Document],
+        repo_id: str,
+        batch_size: int = 100,
+        progress_callback: Optional[callable] = None,
+    ) -> int:
+        """Index using streaming with batches."""
+        total_chunks = 0
+        batch: list[Chunk] = []
+        doc_count = 0
+
+        for doc in documents:
+            doc_count += 1
+            for chunk in self.chunker.chunk_document(doc):
+                chunk.repo_id = repo_id
+                batch.append(chunk)
+
+                if len(batch) >= batch_size:
+                    total_chunks += self._process_batch(batch)
+                    logger.info("Batch processed", total_so_far=total_chunks, docs_processed=doc_count)
+                    if progress_callback:
+                        progress_callback(total_chunks, doc_count)
+                    batch = []
+
+        # Process final batch
+        if batch:
+            total_chunks += self._process_batch(batch)
+            if progress_callback:
+                progress_callback(total_chunks, doc_count)
+
+        return total_chunks
+
+    # =========================================================================
+    # Validation Methods (Nivel 2)
+    # =========================================================================
+
+    def _estimate_repo_size(self, files: list[Path]) -> dict:
+        """Estimate repository size before indexing."""
+        total_size_kb = 0
+        estimated_chunks = 0
+        chunk_size = self.settings.ingestion.chunk_size
+
+        for file_path in files:
+            try:
+                size_kb = file_path.stat().st_size / 1024
+                total_size_kb += size_kb
+                # Rough estimate: 1 chunk per chunk_size characters
+                estimated_chunks += max(1, int(size_kb * 1024 / chunk_size))
+            except Exception:
+                continue
+
+        return {
+            "file_count": len(files),
+            "total_size_kb": total_size_kb,
+            "estimated_chunks": estimated_chunks,
+            "exceeds_file_limit": len(files) > self.settings.ingestion.max_files_per_repo,
+            "exceeds_chunk_limit": estimated_chunks > self.settings.ingestion.max_total_chunks,
+            "warn_large_repo": len(files) > self.settings.ingestion.warn_files_threshold,
+        }
+
+    def _validate_repo_size(self, estimate: dict) -> tuple[bool, str]:
+        """Validate if the repository can be indexed."""
+        if estimate["exceeds_file_limit"]:
+            return False, f"Repository exceeds file limit ({estimate['file_count']} > {self.settings.ingestion.max_files_per_repo})"
+        if estimate["exceeds_chunk_limit"]:
+            return False, f"Repository exceeds chunk limit (~{estimate['estimated_chunks']} > {self.settings.ingestion.max_total_chunks})"
+
+        warning = ""
+        if estimate["warn_large_repo"]:
+            warning = f"Large repository ({estimate['file_count']} files, ~{estimate['estimated_chunks']} chunks). Processing may take several minutes."
+
+        return True, warning
+
+    # =========================================================================
+    # Incremental Indexing Methods (Nivel 3)
+    # =========================================================================
+
+    def _get_current_commit(self, repo_path: Path) -> str:
+        """Get the SHA of the current commit."""
+        from git import Repo
+        git_repo = Repo(repo_path)
+        return git_repo.head.commit.hexsha
+
+    def _get_changed_files(
+        self,
+        repo_path: Path,
+        last_commit: str,
+        current_commit: str,
+    ) -> tuple[set[str], set[str], set[str]]:
+        """Get files that were added, modified, or deleted between commits."""
+        from git import Repo
+        git_repo = Repo(repo_path)
+
+        diff = git_repo.commit(last_commit).diff(current_commit)
+
+        added: set[str] = set()
+        modified: set[str] = set()
+        deleted: set[str] = set()
+
+        for d in diff:
+            if d.new_file:
+                added.add(d.b_path)
+            elif d.deleted_file:
+                deleted.add(d.a_path)
+            elif d.renamed:
+                deleted.add(d.a_path)
+                added.add(d.b_path)
+            else:
+                modified.add(d.b_path or d.a_path)
+
+        return added, modified, deleted
+
+    def index_repository_incremental(self, repo_id: str) -> str:
+        """Update only modified files since last indexing (incremental update)."""
+        # Find repository by full or partial ID
+        found_repo = None
+        for rid, repo in self.repositories.items():
+            if rid == repo_id or rid.startswith(repo_id):
+                found_repo = repo
+                break
+
+        if not found_repo:
+            return "Repository not found"
+
+        repo = found_repo
+
+        if not repo.last_commit:
+            return "No previous indexing found. Please re-index the full repository."
+
+        if not repo.clone_path or not Path(repo.clone_path).exists():
+            return "Repository cache not found. Please re-index."
+
+        try:
+            repo_path = Path(repo.clone_path)
+
+            # Update local repository
+            logger.info("Updating local repository", repo_id=repo.id)
+            self.loader._update_repository(repo_path, repo.branch, None)
+
+            current_commit = self._get_current_commit(repo_path)
+
+            if current_commit == repo.last_commit:
+                return "Repository is already up to date."
+
+            added, modified, deleted = self._get_changed_files(
+                repo_path, repo.last_commit, current_commit
+            )
+
+            logger.info(
+                "Changes detected",
+                added=len(added),
+                modified=len(modified),
+                deleted=len(deleted),
+            )
+
+            # Delete chunks for deleted/modified files
+            for file_path in deleted | modified:
+                self.vectorstore.delete_file_chunks(repo.id, file_path)
+
+            # Index new/modified files
+            files_to_index = []
+            file_filter = FileFilter()
+            for file_path in added | modified:
+                full_path = repo_path / file_path
+                if full_path.exists() and file_filter.should_include(full_path, repo_path):
+                    files_to_index.append(full_path)
+
+            new_chunks = 0
+            if files_to_index:
+                batch_size = self.settings.ingestion.batch_size
+                doc_generator = self._document_generator(files_to_index, repo_path, repo.id)
+                new_chunks = self._stream_index_repository(doc_generator, repo.id, batch_size)
+
+            # Update metadata
+            repo.last_commit = current_commit
+            repo.indexed_at = datetime.now()
+            repo.chunk_count = self.vectorstore.get_repo_chunk_count(repo.id)
+            self._save_repositories()
+
+            return (
+                f"Incremental update complete:\n"
+                f"- Added/Modified: {len(added | modified)} files\n"
+                f"- Deleted: {len(deleted)} files\n"
+                f"- New chunks: {new_chunks}\n"
+                f"- Total chunks: {repo.chunk_count}"
+            )
+
+        except Exception as e:
+            logger.error("Incremental indexing failed", error=str(e), exc_info=True)
+            return f"Error: {str(e)}"
+
+    def index_repository(
+        self,
+        url: str,
+        branch: str = "",
+        include_patterns: str = "",
+        exclude_patterns: str = "",
+    ) -> Iterator[str]:
+        """Index a GitHub repository with progress updates."""
+        try:
+            # Validate URL (sync version, skip accessibility check for UI)
+            yield "Validating repository URL..."
+            logger.info("Starting indexing", url=url, branch=branch)
+            repo_info = self.validator.parse_url(url)
+            branch = branch.strip() or repo_info.branch or "main"
+
+            # Create repository record
+            repo = Repository(
+                url=repo_info.url,
+                branch=branch,
+                status=RepositoryStatus.CLONING,
+            )
+            self.repositories[repo.id] = repo
+
+            # Clone repository
+            yield f"Cloning {repo_info.full_name} (branch: {branch})..."
+            logger.info("Cloning repository", url=url, branch=branch)
+            repo_path = self.loader.clone_repository(repo_info, branch)
+            repo.clone_path = repo_path
+            repo.status = RepositoryStatus.INDEXING
+
+            # Setup filter with custom patterns
+            include = [p.strip() for p in include_patterns.split(",") if p.strip()] or None
+            exclude = [p.strip() for p in exclude_patterns.split(",") if p.strip()] or None
+            file_filter = FileFilter(include_patterns=include, exclude_patterns=exclude)
+
+            # Process files
+            yield "Scanning files..."
+            logger.info("Filtering files", repo_path=str(repo_path))
+            files = list(file_filter.filter_files(repo_path))
+            file_count = len(files)
+            logger.info("Files to process", count=file_count)
+
+            # Validate repository size (Nivel 2)
+            estimate = self._estimate_repo_size(files)
+            can_proceed, message = self._validate_repo_size(estimate)
+
+            if not can_proceed:
+                repo.status = RepositoryStatus.ERROR
+                repo.error_message = message
+                self._save_repositories()
+                yield f"Error: {message}"
+                return
+
+            if message:
+                logger.warning(message)
+                yield f"Warning: {message}"
+
+            yield f"Found {file_count} files to index (~{estimate['estimated_chunks']} chunks)"
+
+            # Delete existing chunks for this repo (re-indexing)
+            logger.info("Deleting previous chunks for repo", repo_id=repo.id)
+            self.vectorstore.delete_repo_chunks(repo.id)
+
+            # Stream indexing with batches and progress updates
+            yield f"Indexing... (0/{file_count} files, 0 chunks)"
+            logger.info("Starting streaming indexing", file_count=file_count)
+            batch_size = self.settings.ingestion.batch_size
+            doc_generator = self._document_generator(files, repo_path, repo.id)
+
+            # Process with progress updates
+            total_chunks = 0
+            batch: list[Chunk] = []
+            doc_count = 0
+
+            for doc in doc_generator:
+                doc_count += 1
+                for chunk in self.chunker.chunk_document(doc):
+                    chunk.repo_id = repo.id
+                    batch.append(chunk)
+
+                    if len(batch) >= batch_size:
+                        total_chunks += self._process_batch(batch)
+                        batch = []
+                        # Yield progress update
+                        yield f"Indexing... ({doc_count}/{file_count} files, {total_chunks} chunks)"
+
+            # Process final batch
+            if batch:
+                total_chunks += self._process_batch(batch)
+
+            logger.info("Streaming indexing complete", chunk_count=total_chunks)
+
+            # Save current commit for incremental updates (Nivel 3)
+            try:
+                repo.last_commit = self._get_current_commit(repo_path)
+            except Exception:
+                repo.last_commit = None
+
+            # Update repository status
+            repo.chunk_count = total_chunks
+            repo.indexed_at = datetime.now()
+            repo.status = RepositoryStatus.READY
+            self._save_repositories()
+
+            result = f"Successfully indexed {repo_info.full_name}\n{file_count} files processed\n{total_chunks} chunks indexed"
+            logger.info("Indexing complete", result=result)
+            yield result
+
+        except ValidationError as e:
+            logger.error("Validation error", error=str(e))
+            yield f"Validation error: {str(e)}"
+        except Exception as e:
+            logger.error("Indexing failed", error=str(e), exc_info=True)
+            if "repo" in locals():
+                repo.status = RepositoryStatus.ERROR
+                repo.error_message = str(e)
+                self._save_repositories()
+            yield f"Error: {str(e)}"
+
+    def ask_question(
+        self,
+        repo_id: str,
+        question: str,
+        top_k: int = 5,
+    ) -> tuple[str, str, str]:
+        """Ask a question about a repository."""
+        if not repo_id:
+            return "", "", "Please select a repository"
+
+        if not question.strip():
+            return "", "", "Please enter a question"
+
+        try:
+            # Lazy load generator
+            if self.generator is None:
+                self.generator = ResponseGenerator()
+
+            query = Query(
+                question=question.strip(),
+                repo_id=repo_id,
+                top_k=int(top_k),
+            )
+
+            response = self.generator.generate(query)
+
+            # Format answer
+            answer_md = f"## Answer\n\n{response.answer}"
+            if response.citations:
+                answer_md += "\n\n### Citations\n"
+                for citation in response.citations:
+                    answer_md += f"- `{citation}`\n"
+
+            # Format evidence
+            evidence_md = response.format_evidence()
+
+            status = "Grounded" if response.grounded else "Not grounded (no citations)"
+
+            return answer_md, evidence_md, status
+
+        except Exception as e:
+            logger.error("Question failed", error=str(e))
+            return "", "", f"Error: {str(e)}"
+
+    def get_repositories(self):
+        """Get list of repositories for dropdown."""
+        import gradio as gr
+        choices = []
+        for repo in self.repositories.values():
+            if repo.status == RepositoryStatus.READY:
+                label = f"{repo.full_name} ({repo.chunk_count} chunks)"
+                choices.append((label, repo.id))
+        return gr.update(choices=choices)
+
+    def get_repositories_table(self) -> list[list]:
+        """Get repositories as table data."""
+        rows = []
+        for repo in self.repositories.values():
+            rows.append([
+                repo.id[:8],
+                repo.full_name,
+                repo.branch,
+                repo.chunk_count,
+                repo.status.value,
+                repo.indexed_at.strftime("%Y-%m-%d %H:%M") if repo.indexed_at else "-",
+            ])
+        return rows
+
+    def delete_repository(self, repo_id: str) -> tuple[str, list[list]]:
+        """Delete a repository."""
+        repo_id = repo_id.strip()
+
+        # Find by full or partial ID
+        found_repo = None
+        for rid, repo in self.repositories.items():
+            if rid == repo_id or rid.startswith(repo_id):
+                found_repo = repo
+                break
+
+        if not found_repo:
+            return "Repository not found", self.get_repositories_table()
+
+        try:
+            # Delete from vector store
+            self.vectorstore.delete_repo_chunks(found_repo.id)
+
+            # Delete cached repo
+            self.loader.delete_cache(
+                type("RepoInfo", (), {"owner": found_repo.owner, "name": found_repo.name})()
+            )
+
+            # Remove from records
+            del self.repositories[found_repo.id]
+            self._save_repositories()
+
+            return f"Deleted {found_repo.full_name}", self.get_repositories_table()
+
+        except Exception as e:
+            logger.error("Delete failed", error=str(e))
+            return f"Error: {str(e)}", self.get_repositories_table()