#!/usr/bin/env python3
"""
Pricing Data Update Script

Updates config/pricing.json with latest LLM provider pricing.
Run periodically to keep cost estimates accurate.

Usage:
    python utils/update_pricing.py [--dry-run] [--output PATH]

Examples:
    # Update pricing.json
    python utils/update_pricing.py

    # Preview changes without writing
    python utils/update_pricing.py --dry-run

    # Write to custom location
    python utils/update_pricing.py --output /path/to/pricing.json
"""

import argparse
import json
import logging
from datetime import datetime
from pathlib import Path
from typing import Dict

# Configure logging
logging.basicConfig(
    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)


# Pricing data (as of 2026-01-19)
# Update these values periodically from provider documentation
PROVIDER_PRICING = {
    "last_updated": datetime.now().isoformat(),
    "version": "2.0",
    "providers": {
        "openai": {
            "models": {
                "gpt-4-turbo": {
                    "input_cost_per_1m_tokens": 10.00,
                    "output_cost_per_1m_tokens": 30.00,
                    "context_window": 128000,
                    "notes": "GPT-4 Turbo - Latest model",
                },
                "gpt-4": {
                    "input_cost_per_1m_tokens": 30.00,
                    "output_cost_per_1m_tokens": 60.00,
                    "context_window": 8192,
                    "notes": "GPT-4 Original",
                },
                "gpt-3.5-turbo": {
                    "input_cost_per_1m_tokens": 0.50,
                    "output_cost_per_1m_tokens": 1.50,
                    "context_window": 16385,
                    "notes": "GPT-3.5 Turbo",
                },
            },
            "default_model": "gpt-3.5-turbo",
        },
        "anthropic": {
            "models": {
                "claude-3-5-sonnet-20241022": {
                    "input_cost_per_1m_tokens": 3.00,
                    "output_cost_per_1m_tokens": 15.00,
                    "context_window": 200000,
                    "notes": "Claude 3.5 Sonnet - Best overall",
                },
                "claude-3-opus-20240229": {
                    "input_cost_per_1m_tokens": 15.00,
                    "output_cost_per_1m_tokens": 75.00,
                    "context_window": 200000,
                    "notes": "Claude 3 Opus - Highest intelligence",
                },
                "claude-3-haiku-20240307": {
                    "input_cost_per_1m_tokens": 0.25,
                    "output_cost_per_1m_tokens": 1.25,
                    "context_window": 200000,
                    "notes": "Claude 3 Haiku - Fastest, most affordable",
                },
            },
            "default_model": "claude-3-5-sonnet-20241022",
        },
        "huggingface": {
            "routing_policies": {
                ":cheapest": {
                    "estimated_cost_range": {
                        "min_per_1m_tokens": 0.00,
                        "max_per_1m_tokens": 0.20,
                    },
                    "free_tier_available": True,
                    "notes": "Automatically selects lowest-cost provider, often free tier",
                },
                ":fastest": {
                    "estimated_cost_range": {
                        "min_per_1m_tokens": 0.50,
                        "max_per_1m_tokens": 3.00,
                    },
                    "free_tier_available": False,
                    "notes": "Selects highest-quality models, typically paid tier",
                },
                "auto": {
                    "estimated_cost_range": {
                        "min_per_1m_tokens": 0.05,
                        "max_per_1m_tokens": 0.20,
                    },
                    "free_tier_available": True,
                    "notes": "Default routing, usually Llama 3.3 70B",
                },
            },
            "providers": {
                "groq": {
                    "estimated_cost_per_1m_tokens": 0.00,
                    "free_tier": True,
                    "notes": "Ultra-fast inference, free tier available",
                },
                "together": {
                    "estimated_cost_per_1m_tokens": 0.10,
                    "free_tier": True,
                    "notes": "Good balance of cost and quality, some free models",
                },
                "replicate": {
                    "estimated_cost_per_1m_tokens": 0.15,
                    "free_tier": False,
                    "notes": "Wide model selection",
                },
                "cerebras": {
                    "estimated_cost_per_1m_tokens": 0.20,
                    "free_tier": False,
                    "notes": "High-performance inference",
                },
                "fireworks": {
                    "estimated_cost_per_1m_tokens": 0.10,
                    "free_tier": False,
                    "notes": "Fast and cost-effective",
                },
                "deepinfra": {
                    "estimated_cost_per_1m_tokens": 0.08,
                    "free_tier": False,
                    "notes": "Budget-friendly option",
                },
            },
            "models": {
                "meta-llama/Llama-3.3-70B-Instruct": {
                    "estimated_cost_per_1m_tokens": 0.10,
                    "free_tier_available": True,
                    "context_window": 128000,
                    "notes": "Default model for auto routing",
                }
            },
            "default_routing": "auto",
        },
        "qwen": {
            "models": {
                "qwen-turbo": {
                    "input_cost_per_1m_tokens": 0.20,
                    "output_cost_per_1m_tokens": 0.60,
                    "context_window": 8192,
                    "notes": "Qwen Turbo via DashScope",
                }
            },
            "default_model": "qwen-turbo",
        },
    },
    "free_tier_detection": {
        "enabled": True,
        "providers": ["huggingface"],
        "keywords": ["free", "groq", "together"],
        "notes": "Automatically detect and display $0.00 for free tier usage",
    },
    "notes": [
        "Prices are estimates and may vary based on provider discounts, credits, and billing variations",
        "Always verify final costs with provider billing",
        "Update this file periodically using: python utils/update_pricing.py",
        "Free tier availability may change - check provider documentation",
    ],
}


def load_current_pricing(path: Path) -> Dict:
    """Load current pricing from file."""
    try:
        with open(path, "r") as f:
            return json.load(f)
    except FileNotFoundError:
        logger.warning(f"Pricing file not found: {path}")
        return {}
    except json.JSONDecodeError as e:
        logger.error(f"Invalid JSON in pricing file: {e}")
        return {}


def compare_pricing(old: Dict, new: Dict) -> Dict:
    """Compare old and new pricing, return changes."""
    changes = {
        "added": [],
        "removed": [],
        "modified": [],
    }

    # Check for version change
    old_version = old.get("version", "unknown")
    new_version = new.get("version", "unknown")
    if old_version != new_version:
        changes["modified"].append(f"Version: {old_version} → {new_version}")

    # Check for provider changes
    old_providers = set(old.get("providers", {}).keys())
    new_providers = set(new.get("providers", {}).keys())

    changes["added"].extend([f"Provider: {p}" for p in new_providers - old_providers])
    changes["removed"].extend([f"Provider: {p}" for p in old_providers - new_providers])

    return changes


def format_changes(changes: Dict) -> str:
    """Format changes as readable string."""
    lines = []

    if changes["added"]:
        lines.append("Added:")
        for item in changes["added"]:
            lines.append(f"  + {item}")

    if changes["removed"]:
        lines.append("Removed:")
        for item in changes["removed"]:
            lines.append(f"  - {item}")

    if changes["modified"]:
        lines.append("Modified:")
        for item in changes["modified"]:
            lines.append(f"  ✎ {item}")

    if not any(changes.values()):
        lines.append("No changes detected")

    return "\n".join(lines)


def write_pricing(pricing: Dict, path: Path, dry_run: bool = False):
    """Write pricing to file."""
    if dry_run:
        logger.info("DRY RUN - Would write to: %s", path)
        logger.info("Preview:")
        logger.info(json.dumps(pricing, indent=2))
        return

    try:
        # Create directory if needed
        path.parent.mkdir(parents=True, exist_ok=True)

        # Write with pretty formatting
        with open(path, "w") as f:
            json.dump(pricing, f, indent=2)

        logger.info(f"✅ Pricing updated: {path}")

    except Exception as e:
        logger.error(f"❌ Failed to write pricing: {e}")
        raise


def main():
    """Main entry point."""
    parser = argparse.ArgumentParser(
        description="Update LLM provider pricing data",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  # Update pricing.json
  python utils/update_pricing.py

  # Preview changes without writing
  python utils/update_pricing.py --dry-run

  # Write to custom location
  python utils/update_pricing.py --output /path/to/pricing.json
        """,
    )

    parser.add_argument(
        "--dry-run", action="store_true", help="Preview changes without writing to file"
    )

    parser.add_argument(
        "--output",
        type=Path,
        help="Output path for pricing.json (default: config/pricing.json)",
    )

    args = parser.parse_args()

    # Determine output path
    if args.output:
        output_path = args.output
    else:
        # Default to config/pricing.json relative to script location
        script_dir = Path(__file__).parent
        output_path = script_dir.parent / "config" / "pricing.json"

    logger.info("=" * 60)
    logger.info("LLM Provider Pricing Update")
    logger.info("=" * 60)
    logger.info(f"Output path: {output_path}")
    logger.info(f"Dry run: {args.dry_run}")
    logger.info("")

    # Load current pricing
    current_pricing = load_current_pricing(output_path)

    # Compare with new pricing
    changes = compare_pricing(current_pricing, PROVIDER_PRICING)

    logger.info("Changes:")
    logger.info(format_changes(changes))
    logger.info("")

    # Write new pricing
    write_pricing(PROVIDER_PRICING, output_path, dry_run=args.dry_run)

    if not args.dry_run:
        logger.info("")
        logger.info("✅ Pricing update complete!")
        logger.info(f"Updated: {output_path}")
        logger.info(f"Last updated: {PROVIDER_PRICING['last_updated']}")


if __name__ == "__main__":
    main()