"""
FastAPI application for ClientSphere RAG Backend.
Provides endpoints for knowledge base management and chat.
"""
from fastapi import FastAPI, File, UploadFile, HTTPException, Form, BackgroundTasks, Request, Depends
from fastapi.middleware.cors import CORSMiddleware
from fastapi.exceptions import RequestValidationError
from fastapi.responses import JSONResponse
from pathlib import Path
import shutil
import uuid
from datetime import datetime
from typing import Optional
import logging

from app.config import settings
from app.middleware.auth import get_auth_context, require_auth
from app.middleware.rate_limit import (
    limiter,
    get_tenant_rate_limit_key,
    RateLimitExceeded,
    _rate_limit_exceeded_handler
)
from app.models.schemas import (
    UploadResponse,
    ChatRequest,
    ChatResponse,
    KnowledgeBaseStats,
    HealthResponse,
    DocumentStatus,
    Citation,
)
from app.models.billing_schemas import (
    UsageResponse,
    PlanLimitsResponse,
    CostReportResponse,
    SetPlanRequest
)
from app.rag.ingest import parser
from app.rag.chunking import chunker
from app.rag.embeddings import get_embedding_service
from app.rag.vectorstore import get_vector_store
from app.rag.retrieval import get_retrieval_service
from app.rag.answer import get_answer_service
from app.db.database import get_db, init_db
from app.billing.quota import check_quota, ensure_tenant_exists
from app.billing.usage_tracker import track_usage

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Initialize FastAPI app
app = FastAPI(
    title=settings.APP_NAME,
    description="RAG-based customer support chatbot API",
    version="1.0.0",
)

# Initialize database on startup
@app.on_event("startup")
async def startup_event():
    """Initialize database on application startup."""
    init_db()
    logger.info("Database initialized")

# Configure CORS - SECURITY: Restrict in production
if settings.ALLOWED_ORIGINS == "*":
    allowed_origins = ["*"]
else:
    # Split by comma and strip whitespace
    allowed_origins = [origin.strip() for origin in settings.ALLOWED_ORIGINS.split(",") if origin.strip()]

# Default to allowing localhost if no origins specified
if not allowed_origins or allowed_origins == ["*"]:
    allowed_origins = ["*"]  # Allow all in dev mode

logger.info(f"CORS configured with origins: {allowed_origins}")

app.add_middleware(
    CORSMiddleware,
    allow_origins=allowed_origins,
    allow_credentials=True,
    allow_methods=["GET", "POST", "DELETE", "OPTIONS"],  # Include OPTIONS for preflight
    allow_headers=["Content-Type", "Authorization", "X-Tenant-Id", "X-User-Id"],  # Include auth headers
)

# Configure rate limiting
app.state.limiter = limiter
app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)

# Add exception handler for validation errors
@app.exception_handler(RequestValidationError)
async def validation_exception_handler(request: Request, exc: RequestValidationError):
    """Handle request validation errors with detailed logging."""
    body = await request.body()
    logger.error(f"Request validation error: {exc.errors()}")
    logger.error(f"Request body (raw): {body}")
    logger.error(f"Request headers: {dict(request.headers)}")
    return JSONResponse(
        status_code=422,
        content={"detail": exc.errors(), "body": body.decode('utf-8', errors='ignore')}
    )

# Add exception handler for validation errors
from fastapi.exceptions import RequestValidationError
from fastapi.responses import JSONResponse

@app.exception_handler(RequestValidationError)
async def validation_exception_handler(request: Request, exc: RequestValidationError):
    """Handle request validation errors with detailed logging."""
    logger.error(f"Request validation error: {exc.errors()}")
    logger.error(f"Request body: {await request.body()}")
    return JSONResponse(
        status_code=422,
        content={"detail": exc.errors(), "body": str(await request.body())}
    )


# ============== Health & Status Endpoints ==============

@app.get("/", response_model=HealthResponse)
async def root():
    """Root endpoint with basic info."""
    return HealthResponse(
        status="ok",
        version="1.0.0",
        vector_db_connected=True,
        llm_configured=bool(settings.GEMINI_API_KEY or settings.OPENAI_API_KEY)
    )


@app.get("/health", response_model=HealthResponse)
async def health_check():
    """Health check endpoint."""
    try:
        vector_store = get_vector_store()
        stats = vector_store.get_stats()
        
        return HealthResponse(
            status="healthy",
            version="1.0.0",
            vector_db_connected=True,
            llm_configured=bool(settings.GEMINI_API_KEY or settings.OPENAI_API_KEY)
        )
    except Exception as e:
        logger.error(f"Health check failed: {e}")
        return HealthResponse(
            status="unhealthy",
            version="1.0.0",
            vector_db_connected=False,
            llm_configured=False
        )


@app.get("/health/live")
async def liveness():
    """Kubernetes liveness probe - always returns alive."""
    return {"status": "alive"}


@app.get("/health/ready")
async def readiness():
    """Kubernetes readiness probe - checks dependencies."""
    checks = {
        "vector_db": False,
        "llm_configured": bool(settings.GEMINI_API_KEY or settings.OPENAI_API_KEY)
    }
    
    # Check vector DB connection
    try:
        vector_store = get_vector_store()
        vector_store.get_stats()
        checks["vector_db"] = True
    except Exception as e:
        logger.warning(f"Vector DB check failed: {e}")
        checks["vector_db"] = False
    
    # All checks must pass
    if all(checks.values()):
        return {"status": "ready", "checks": checks}
    else:
        from fastapi import HTTPException
        raise HTTPException(status_code=503, detail={"status": "not_ready", "checks": checks})


# ============== Knowledge Base Endpoints ==============

@app.post("/kb/upload", response_model=UploadResponse)
@limiter.limit("20/hour", key_func=get_tenant_rate_limit_key)
async def upload_document(
    background_tasks: BackgroundTasks,
    request: Request,
    file: UploadFile = File(...),
    tenant_id: Optional[str] = Form(None),  # Optional in dev, ignored in prod
    user_id: Optional[str] = Form(None),  # Optional in dev, ignored in prod
    kb_id: str = Form(...)
):
    """
    Upload a document to the knowledge base.
    
    - Saves file to disk
    - Parses and chunks the document
    - Generates embeddings
    - Stores in vector database
    """
    # SECURITY: Extract tenant_id from auth token in production
    if settings.ENV == "prod":
        auth_context = await require_auth(request)
        tenant_id = auth_context.get("tenant_id")
        if not tenant_id:
            raise HTTPException(
                status_code=403,
                detail="tenant_id must come from authentication token in production mode"
            )
    elif not tenant_id:
        raise HTTPException(
            status_code=400,
            detail="tenant_id is required"
        )
    
    # Validate file type
    file_ext = Path(file.filename).suffix.lower()
    if file_ext not in parser.SUPPORTED_EXTENSIONS:
        raise HTTPException(
            status_code=400,
            detail=f"Unsupported file type: {file_ext}. Supported: {parser.SUPPORTED_EXTENSIONS}"
        )
    
    # Validate file size (SECURITY)
    file.file.seek(0, 2)  # Seek to end
    file_size = file.file.tell()
    file.file.seek(0)  # Reset to start
    max_size_bytes = settings.MAX_FILE_SIZE_MB * 1024 * 1024
    if file_size > max_size_bytes:
        raise HTTPException(
            status_code=400,
            detail=f"File too large. Maximum size: {settings.MAX_FILE_SIZE_MB}MB"
        )
    
    # Generate document ID
    doc_id = f"{tenant_id}_{kb_id}_{uuid.uuid4().hex[:8]}"
    
    # Save file to uploads directory
    upload_path = settings.UPLOADS_DIR / f"{doc_id}_{file.filename}"
    try:
        with open(upload_path, "wb") as buffer:
            shutil.copyfileobj(file.file, buffer)
        logger.info(f"Saved file: {upload_path}")
    except Exception as e:
        logger.error(f"Error saving file: {e}")
        raise HTTPException(status_code=500, detail="Failed to save file")
    
    # Process document in background
    background_tasks.add_task(
        process_document,
        upload_path,
        tenant_id,  # CRITICAL: Multi-tenant isolation
        user_id,
        kb_id,
        file.filename,
        doc_id
    )
    
    return UploadResponse(
        success=True,
        message="Document upload started. Processing in background.",
        document_id=doc_id,
        file_name=file.filename,
        chunks_created=0,
        status=DocumentStatus.PROCESSING
    )


async def process_document(
    file_path: Path,
    tenant_id: str,  # CRITICAL: Multi-tenant isolation
    user_id: str,
    kb_id: str,
    original_filename: str,
    document_id: str
):
    """
    Background task to process an uploaded document.
    """
    try:
        logger.info(f"Processing document: {original_filename}")
        
        # Parse document
        parsed_doc = parser.parse(file_path)
        logger.info(f"Parsed document: {len(parsed_doc.text)} characters")
        
        # Chunk document
        chunks = chunker.chunk_text(
            parsed_doc.text,
            page_numbers=parsed_doc.page_map
        )
        logger.info(f"Created {len(chunks)} chunks")
        
        if not chunks:
            logger.warning(f"No chunks created from {original_filename}")
            return
        
        # Create metadata for each chunk
        metadatas = []
        chunk_ids = []
        chunk_texts = []
        
        for chunk in chunks:
            metadata = chunker.create_chunk_metadata(
                chunk=chunk,
                tenant_id=tenant_id,  # CRITICAL: Multi-tenant isolation
                kb_id=kb_id,
                user_id=user_id,
                file_name=original_filename,
                file_type=parsed_doc.file_type,
                total_chunks=len(chunks),
                document_id=document_id
            )
            metadatas.append(metadata)
            chunk_ids.append(metadata["chunk_id"])
            chunk_texts.append(chunk.content)
        
        # Generate embeddings
        embedding_service = get_embedding_service()
        embeddings = embedding_service.embed_texts(chunk_texts)
        logger.info(f"Generated {len(embeddings)} embeddings")
        
        # Store in vector database
        vector_store = get_vector_store()
        vector_store.add_documents(
            documents=chunk_texts,
            embeddings=embeddings,
            metadatas=metadatas,
            ids=chunk_ids
        )
        
        logger.info(f"Successfully processed {original_filename}: {len(chunks)} chunks stored")
        
    except Exception as e:
        logger.error(f"Error processing document {original_filename}: {e}")
        raise


@app.get("/kb/stats", response_model=KnowledgeBaseStats)
async def get_kb_stats(
    request: Request,
    tenant_id: Optional[str] = None,  # Optional in dev, ignored in prod
    kb_id: Optional[str] = None,
    user_id: Optional[str] = None  # Optional in dev, ignored in prod
):
    """Get statistics for a knowledge base."""
    # SECURITY: Get tenant_id and user_id from auth context
    auth_context = await get_auth_context(request)
    tenant_id_from_auth = auth_context.get("tenant_id")
    user_id_from_auth = auth_context.get("user_id")
    
    if settings.ENV == "prod":
        if not tenant_id_from_auth or not user_id_from_auth:
            raise HTTPException(
                status_code=403,
                detail="tenant_id and user_id must come from authentication token in production mode"
            )
        tenant_id = tenant_id_from_auth
        user_id = user_id_from_auth
    else:
        tenant_id = tenant_id or tenant_id_from_auth
        user_id = user_id or user_id_from_auth
        if not tenant_id or not kb_id or not user_id:
            raise HTTPException(
                status_code=400,
                detail="tenant_id, kb_id, and user_id are required"
            )
    
    try:
        vector_store = get_vector_store()
        stats = vector_store.get_stats(tenant_id=tenant_id, kb_id=kb_id, user_id=user_id)
        
        return KnowledgeBaseStats(
            tenant_id=tenant_id,  # CRITICAL: Multi-tenant isolation
            kb_id=kb_id,
            user_id=user_id,
            total_documents=len(stats.get("file_names", [])),
            total_chunks=stats.get("total_chunks", 0),
            file_names=stats.get("file_names", []),
            last_updated=datetime.utcnow()
        )
    except Exception as e:
        logger.error(f"Error getting KB stats: {e}")
        raise HTTPException(status_code=500, detail=str(e))


@app.delete("/kb/document")
async def delete_document(
    request: Request,
    tenant_id: Optional[str] = None,  # Optional in dev, ignored in prod
    kb_id: Optional[str] = None,
    user_id: Optional[str] = None,  # Optional in dev, ignored in prod
    file_name: Optional[str] = None
):
    """Delete a document from the knowledge base."""
    # SECURITY: Get tenant_id and user_id from auth context
    auth_context = await get_auth_context(request)
    tenant_id_from_auth = auth_context.get("tenant_id")
    user_id_from_auth = auth_context.get("user_id")
    
    if settings.ENV == "prod":
        if not tenant_id_from_auth or not user_id_from_auth:
            raise HTTPException(
                status_code=403,
                detail="tenant_id and user_id must come from authentication token in production mode"
            )
        tenant_id = tenant_id_from_auth
        user_id = user_id_from_auth
    else:
        tenant_id = tenant_id or tenant_id_from_auth
        user_id = user_id or user_id_from_auth
        if not tenant_id or not kb_id or not user_id or not file_name:
            raise HTTPException(
                status_code=400,
                detail="tenant_id, kb_id, user_id, and file_name are required (provide via headers or query params)"
            )
    
    try:
        vector_store = get_vector_store()
        deleted = vector_store.delete_by_filter({
            "tenant_id": tenant_id,  # CRITICAL: Multi-tenant isolation
            "kb_id": kb_id,
            "user_id": user_id,
            "file_name": file_name
        })
        
        return {
            "success": True,
            "message": f"Deleted {deleted} chunks",
            "file_name": file_name
        }
    except Exception as e:
        logger.error(f"Error deleting document: {e}")
        raise HTTPException(status_code=500, detail=str(e))


@app.delete("/kb/clear")
async def clear_kb(
    request: Request,
    tenant_id: Optional[str] = None,  # Optional in dev, ignored in prod
    kb_id: Optional[str] = None,
    user_id: Optional[str] = None  # Optional in dev, ignored in prod
):
    """Clear all documents from a knowledge base."""
    # SECURITY: Get tenant_id and user_id from auth context
    auth_context = await get_auth_context(request)
    tenant_id_from_auth = auth_context.get("tenant_id")
    user_id_from_auth = auth_context.get("user_id")
    
    if settings.ENV == "prod":
        if not tenant_id_from_auth or not user_id_from_auth:
            raise HTTPException(
                status_code=403,
                detail="tenant_id and user_id must come from authentication token in production mode"
            )
        tenant_id = tenant_id_from_auth
        user_id = user_id_from_auth
    else:
        tenant_id = tenant_id or tenant_id_from_auth
        user_id = user_id or user_id_from_auth
        if not tenant_id or not kb_id or not user_id:
            raise HTTPException(
                status_code=400,
                detail="tenant_id, kb_id, and user_id are required"
            )
    try:
        vector_store = get_vector_store()
        deleted = vector_store.delete_by_filter({
            "tenant_id": tenant_id,  # CRITICAL: Multi-tenant isolation
            "kb_id": kb_id,
            "user_id": user_id
        })
        
        return {
            "success": True,
            "message": f"Cleared knowledge base. Deleted {deleted} chunks.",
            "kb_id": kb_id
        }
    except Exception as e:
        logger.error(f"Error clearing KB: {e}")
        raise HTTPException(status_code=500, detail=str(e))


# ============== Chat Endpoints ==============

@app.post("/chat", response_model=ChatResponse)
@limiter.limit("10/minute", key_func=get_tenant_rate_limit_key)
async def chat(chat_request: ChatRequest, request: Request):
    """
    Process a chat message using RAG.
    
    - Retrieves relevant context from knowledge base
    - Generates answer using LLM
    - Returns answer with citations
    """
    conversation_id = "unknown"
    try:
        logger.info(f"=== CHAT REQUEST RECEIVED ===")
        logger.info(f"Request body: tenant_id={chat_request.tenant_id}, user_id={chat_request.user_id}, kb_id={chat_request.kb_id}, question_length={len(chat_request.question)}")
        logger.info(f"Request headers: {dict(request.headers)}")
        
        # SECURITY: Get tenant_id and user_id from auth context
        # In PROD: MUST come from JWT token (never from request body)
        try:
            auth_context = await get_auth_context(request)
        except Exception as e:
            logger.error(f"Error getting auth context: {e}", exc_info=True)
            raise HTTPException(status_code=401, detail=f"Authentication error: {str(e)}")
        
        tenant_id_from_auth = auth_context.get("tenant_id")
        user_id_from_auth = auth_context.get("user_id")
        
        if settings.ENV == "prod":
            if not tenant_id_from_auth or not user_id_from_auth:
                raise HTTPException(
                    status_code=403,
                    detail="tenant_id and user_id must come from authentication token in production mode"
                )
            # Override request values with auth context (security enforcement)
            chat_request.tenant_id = tenant_id_from_auth
            chat_request.user_id = user_id_from_auth
        else:
            # DEV mode: use from request if provided, otherwise from auth context
            if not chat_request.tenant_id:
                chat_request.tenant_id = tenant_id_from_auth
            if not chat_request.user_id:
                chat_request.user_id = user_id_from_auth
            if not chat_request.tenant_id or not chat_request.user_id:
                raise HTTPException(
                    status_code=400,
                    detail="tenant_id and user_id are required (provide via X-Tenant-Id/X-User-Id headers or request body)"
                )
        
        # Log without PII in production
        if settings.ENV == "prod":
            logger.info(f"Chat request: tenant={chat_request.tenant_id}, user={chat_request.user_id}, kb={chat_request.kb_id}, q_length={len(chat_request.question)}")
        else:
            logger.info(f"Chat request: tenant={chat_request.tenant_id}, user={chat_request.user_id}, kb={chat_request.kb_id}, q={chat_request.question[:50]}...")
        
        # Generate conversation ID if not provided
        conversation_id = chat_request.conversation_id or f"conv_{uuid.uuid4().hex[:12]}"
        
        # Get database session
        try:
            db = next(get_db())
        except Exception as e:
            logger.error(f"Database connection error: {e}", exc_info=True)
            raise HTTPException(status_code=500, detail=f"Database error: {str(e)}")
        
        try:
            # Ensure tenant exists in billing DB
            ensure_tenant_exists(db, chat_request.tenant_id)
            
            # Check quota BEFORE making LLM call
            has_quota, quota_error = check_quota(db, chat_request.tenant_id)
            if not has_quota:
                logger.warning(f"Quota exceeded for tenant {chat_request.tenant_id}")
                raise HTTPException(
                    status_code=402,
                    detail=quota_error or "AI quota exceeded. Upgrade your plan."
                )
            
            # Retrieve relevant context
            retrieval_service = get_retrieval_service()
            results, confidence, has_relevant = retrieval_service.retrieve(
                query=chat_request.question,
                tenant_id=chat_request.tenant_id,  # CRITICAL: Multi-tenant isolation
                kb_id=chat_request.kb_id,
                user_id=chat_request.user_id
            )
            
            logger.info(f"Retrieval results: {len(results)} results, confidence={confidence:.3f}, has_relevant={has_relevant}")
            
            # Format context for LLM
            context, citations_info = retrieval_service.get_context_for_llm(results)
            
            logger.info(f"Formatted context length: {len(context)} chars, citations: {len(citations_info)}")
            
            # Generate answer
            answer_service = get_answer_service()
            answer_result = answer_service.generate_answer(
                question=chat_request.question,
                context=context,
                citations_info=citations_info,
                confidence=confidence,
                has_relevant_results=has_relevant
            )
            
            # Track usage if LLM was called (usage info present)
            usage_info = answer_result.get("usage")
            if usage_info:
                try:
                    track_usage(
                        db=db,
                        tenant_id=chat_request.tenant_id,
                        user_id=chat_request.user_id,
                        kb_id=chat_request.kb_id,
                        provider=settings.LLM_PROVIDER,
                        model=usage_info.get("model_used", settings.GEMINI_MODEL if settings.LLM_PROVIDER == "gemini" else settings.OPENAI_MODEL),
                        prompt_tokens=usage_info.get("prompt_tokens", 0),
                        completion_tokens=usage_info.get("completion_tokens", 0)
                    )
                except Exception as e:
                    logger.error(f"Failed to track usage: {e}", exc_info=True)
                    # Don't fail the request if usage tracking fails
            
            # Build metadata with refusal info
            metadata = {
                "chunks_retrieved": len(results),
                "kb_id": chat_request.kb_id
            }
            if "refused" in answer_result:
                metadata["refused"] = answer_result["refused"]
            if "refusal_reason" in answer_result:
                metadata["refusal_reason"] = answer_result["refusal_reason"]
            if "verifier_passed" in answer_result:
                metadata["verifier_passed"] = answer_result["verifier_passed"]
            
            return ChatResponse(
                success=True,
                answer=answer_result["answer"],
                citations=answer_result["citations"],
                confidence=answer_result["confidence"],
                from_knowledge_base=answer_result["from_knowledge_base"],
                escalation_suggested=answer_result["escalation_suggested"],
                conversation_id=conversation_id,
                refused=answer_result.get("refused", False),
                metadata=metadata
            )
            
        except ValueError as e:
            # API key or configuration error
            error_msg = str(e)
            logger.error(f"Configuration error: {error_msg}")
            if "API key" in error_msg.lower():
                return ChatResponse(
                    success=False,
                    answer="⚠️ LLM API key not configured. Please set GEMINI_API_KEY in your .env file. Retrieval is working, but answer generation requires an API key.",
                    citations=[],
                    confidence=0.0,
                    from_knowledge_base=False,
                    escalation_suggested=True,
                    conversation_id=conversation_id,
                    metadata={"error": error_msg, "error_type": "configuration"}
                )
            else:
                return ChatResponse(
                    success=False,
                    answer=f"Configuration error: {error_msg}",
                    citations=[],
                    confidence=0.0,
                    from_knowledge_base=False,
                    escalation_suggested=True,
                    conversation_id=conversation_id,
                    metadata={"error": error_msg}
                )
        except HTTPException:
            # Re-raise HTTP exceptions (they have proper status codes)
            raise
        except Exception as e:
            logger.error(f"Chat error: {e}", exc_info=True)
            logger.error(f"Error type: {type(e).__name__}", exc_info=True)
            return ChatResponse(
                success=False,
                answer=f"I encountered an error processing your request: {str(e)}. Please check the server logs for details.",
                citations=[],
                confidence=0.0,
                from_knowledge_base=False,
                escalation_suggested=True,
                conversation_id=conversation_id,
                metadata={"error": str(e), "error_type": type(e).__name__}
            )
    except HTTPException:
        # Re-raise HTTP exceptions from outer try block
        raise
    except Exception as e:
        logger.error(f"Outer chat error: {e}", exc_info=True)
        return ChatResponse(
            success=False,
            answer=f"I encountered an error processing your request: {str(e)}. Please check the server logs for details.",
            citations=[],
            confidence=0.0,
            from_knowledge_base=False,
            escalation_suggested=True,
            conversation_id=conversation_id,
            metadata={"error": str(e), "error_type": type(e).__name__}
        )


# ============== Utility Endpoints ==============

@app.get("/kb/search")
@limiter.limit("30/minute", key_func=get_tenant_rate_limit_key)
async def search_kb(
    request: Request,
    query: str,
    tenant_id: Optional[str] = None,  # Optional in dev, ignored in prod
    kb_id: Optional[str] = None,
    user_id: Optional[str] = None,  # Optional in dev, ignored in prod
    top_k: int = 5
):
    """
    Search the knowledge base without generating an answer.
    Useful for debugging and testing retrieval.
    """
    # SECURITY: Extract tenant_id from auth token in production
    if settings.ENV == "prod":
        auth_context = await require_auth(request)
        tenant_id = auth_context.get("tenant_id")
        user_id = auth_context.get("user_id")
        if not tenant_id or not user_id:
            raise HTTPException(
                status_code=403,
                detail="tenant_id and user_id must come from authentication token in production mode"
            )
    elif not tenant_id or not kb_id or not user_id:
        raise HTTPException(
            status_code=400,
            detail="tenant_id, kb_id, and user_id are required"
        )
    
    try:
        retrieval_service = get_retrieval_service()
        results, confidence, has_relevant = retrieval_service.retrieve(
            query=query,
            tenant_id=tenant_id,  # CRITICAL: Multi-tenant isolation
            kb_id=kb_id,
            user_id=user_id,
            top_k=top_k
        )
        
        return {
            "success": True,
            "results": [
                {
                    "chunk_id": r.chunk_id,
                    "content": r.content[:500] + "..." if len(r.content) > 500 else r.content,
                    "metadata": r.metadata,
                    "similarity_score": r.similarity_score
                }
                for r in results
            ],
            "confidence": confidence,
            "has_relevant_results": has_relevant
        }
    except Exception as e:
        logger.error(f"Search error: {e}")
        raise HTTPException(status_code=500, detail=str(e))


# ============== Billing & Usage Endpoints ==============

@app.get("/billing/usage", response_model=UsageResponse)
async def get_usage(
    request: Request,
    range: str = "month",  # "day" or "month"
    year: Optional[int] = None,
    month: Optional[int] = None,
    day: Optional[int] = None
):
    """
    Get usage statistics for the current tenant.
    
    Args:
        range: "day" or "month"
        year: Year (optional, defaults to current)
        month: Month 1-12 (optional, defaults to current)
        day: Day 1-31 (optional, defaults to current, only for range="day")
    """
    # Get tenant from auth
    auth_context = await get_auth_context(request)
    tenant_id = auth_context.get("tenant_id")
    
    if not tenant_id:
        raise HTTPException(status_code=403, detail="tenant_id required")
    
    db = next(get_db())
    
    try:
        from app.db.models import UsageDaily, UsageMonthly
        from datetime import datetime
        from calendar import monthrange
        
        now = datetime.utcnow()
        target_year = year or now.year
        target_month = month or now.month
        
        if range == "day":
            target_day = day or now.day
            date_start = datetime(target_year, target_month, target_day)
            
            daily = db.query(UsageDaily).filter(
                UsageDaily.tenant_id == tenant_id,
                UsageDaily.date == date_start
            ).first()
            
            if not daily:
                return UsageResponse(
                    tenant_id=tenant_id,
                    period="day",
                    total_requests=0,
                    total_tokens=0,
                    total_cost_usd=0.0,
                    start_date=date_start,
                    end_date=date_start
                )
            
            return UsageResponse(
                tenant_id=tenant_id,
                period="day",
                total_requests=daily.total_requests,
                total_tokens=daily.total_tokens,
                total_cost_usd=daily.total_cost_usd,
                gemini_requests=daily.gemini_requests,
                openai_requests=daily.openai_requests,
                start_date=daily.date,
                end_date=daily.date
            )
        else:  # month
            monthly = db.query(UsageMonthly).filter(
                UsageMonthly.tenant_id == tenant_id,
                UsageMonthly.year == target_year,
                UsageMonthly.month == target_month
            ).first()
            
            if not monthly:
                # Calculate date range for the month
                _, last_day = monthrange(target_year, target_month)
                start_date = datetime(target_year, target_month, 1)
                end_date = datetime(target_year, target_month, last_day)
                
                return UsageResponse(
                    tenant_id=tenant_id,
                    period="month",
                    total_requests=0,
                    total_tokens=0,
                    total_cost_usd=0.0,
                    start_date=start_date,
                    end_date=end_date
                )
            
            _, last_day = monthrange(monthly.year, monthly.month)
            start_date = datetime(monthly.year, monthly.month, 1)
            end_date = datetime(monthly.year, monthly.month, last_day)
            
            return UsageResponse(
                tenant_id=tenant_id,
                period="month",
                total_requests=monthly.total_requests,
                total_tokens=monthly.total_tokens,
                total_cost_usd=monthly.total_cost_usd,
                gemini_requests=monthly.gemini_requests,
                openai_requests=monthly.openai_requests,
                start_date=start_date,
                end_date=end_date
            )
    except Exception as e:
        logger.error(f"Error getting usage: {e}", exc_info=True)
        raise HTTPException(status_code=500, detail=str(e))


@app.get("/billing/limits", response_model=PlanLimitsResponse)
async def get_limits(request: Request):
    """Get current plan limits and usage for the tenant."""
    # Get tenant from auth
    auth_context = await get_auth_context(request)
    tenant_id = auth_context.get("tenant_id")
    
    if not tenant_id:
        raise HTTPException(status_code=403, detail="tenant_id required")
    
    db = next(get_db())
    
    try:
        from app.billing.quota import get_tenant_plan, get_monthly_usage
        from datetime import datetime
        
        plan = get_tenant_plan(db, tenant_id)
        if not plan:
            # Default to starter
            plan_name = "starter"
            monthly_limit = 500
        else:
            plan_name = plan.plan_name
            monthly_limit = plan.monthly_chat_limit
        
        # Get current month usage
        now = datetime.utcnow()
        monthly_usage = get_monthly_usage(db, tenant_id, now.year, now.month)
        current_usage = monthly_usage.total_requests if monthly_usage else 0
        
        remaining = None if monthly_limit == -1 else max(0, monthly_limit - current_usage)
        
        return PlanLimitsResponse(
            tenant_id=tenant_id,
            plan_name=plan_name,
            monthly_chat_limit=monthly_limit,
            current_month_usage=current_usage,
            remaining_chats=remaining
        )
    except Exception as e:
        logger.error(f"Error getting limits: {e}", exc_info=True)
        raise HTTPException(status_code=500, detail=str(e))


@app.post("/billing/plan")
async def set_plan(request_body: SetPlanRequest, http_request: Request):
    """
    Set tenant's subscription plan (admin only in production).
    
    In dev mode, allows any tenant to set their plan.
    In prod mode, should be restricted to admin users.
    """
    # Get tenant from auth
    auth_context = await get_auth_context(http_request)
    auth_tenant_id = auth_context.get("tenant_id")
    
    # In prod, verify admin role (placeholder - implement actual admin check)
    if settings.ENV == "prod":
        # TODO: Add admin role check
        if auth_tenant_id != request_body.tenant_id:
            raise HTTPException(status_code=403, detail="Cannot set plan for other tenants")
    
    db = next(get_db())
    
    try:
        from app.billing.quota import set_tenant_plan
        
        plan = set_tenant_plan(db, request_body.tenant_id, request_body.plan_name)
        
        return {
            "success": True,
            "tenant_id": request_body.tenant_id,
            "plan_name": plan.plan_name,
            "monthly_chat_limit": plan.monthly_chat_limit
        }
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))
    except Exception as e:
        logger.error(f"Error setting plan: {e}", exc_info=True)
        raise HTTPException(status_code=500, detail=str(e))


@app.get("/billing/cost-report", response_model=CostReportResponse)
async def get_cost_report(
    request: Request,
    range: str = "month",
    year: Optional[int] = None,
    month: Optional[int] = None
):
    """Get cost report with breakdown by provider and model."""
    # Get tenant from auth
    auth_context = await get_auth_context(request)
    tenant_id = auth_context.get("tenant_id")
    
    if not tenant_id:
        raise HTTPException(status_code=403, detail="tenant_id required")
    
    db = next(get_db())
    
    try:
        from app.db.models import UsageEvent
        from datetime import datetime
        from sqlalchemy import func, and_
        
        now = datetime.utcnow()
        target_year = year or now.year
        target_month = month or now.month
        
        # Query usage events for the period
        if range == "month":
            query = db.query(UsageEvent).filter(
                and_(
                    UsageEvent.tenant_id == tenant_id,
                    func.extract('year', UsageEvent.request_timestamp) == target_year,
                    func.extract('month', UsageEvent.request_timestamp) == target_month
                )
            )
        else:  # all time
            query = db.query(UsageEvent).filter(UsageEvent.tenant_id == tenant_id)
        
        events = query.all()
        
        # Calculate totals
        total_cost = sum(e.estimated_cost_usd for e in events)
        total_requests = len(events)
        total_tokens = sum(e.total_tokens for e in events)
        
        # Breakdown by provider
        breakdown_by_provider = {}
        for event in events:
            provider = event.provider
            if provider not in breakdown_by_provider:
                breakdown_by_provider[provider] = {
                    "requests": 0,
                    "tokens": 0,
                    "cost_usd": 0.0
                }
            breakdown_by_provider[provider]["requests"] += 1
            breakdown_by_provider[provider]["tokens"] += event.total_tokens
            breakdown_by_provider[provider]["cost_usd"] += event.estimated_cost_usd
        
        # Breakdown by model
        breakdown_by_model = {}
        for event in events:
            model = event.model
            if model not in breakdown_by_model:
                breakdown_by_model[model] = {
                    "requests": 0,
                    "tokens": 0,
                    "cost_usd": 0.0
                }
            breakdown_by_model[model]["requests"] += 1
            breakdown_by_model[model]["tokens"] += event.total_tokens
            breakdown_by_model[model]["cost_usd"] += event.estimated_cost_usd
        
        return CostReportResponse(
            tenant_id=tenant_id,
            period=range,
            total_cost_usd=total_cost,
            total_requests=total_requests,
            total_tokens=total_tokens,
            breakdown_by_provider=breakdown_by_provider,
            breakdown_by_model=breakdown_by_model
        )
    except Exception as e:
        logger.error(f"Error getting cost report: {e}", exc_info=True)
        raise HTTPException(status_code=500, detail=str(e))


if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000)