import httpx
import os
from typing import Dict, Any, Optional
from pathlib import Path

class AssetAnalyzer:
    """Service to analyze uploaded assets using OCR API and extract content"""
    
    def __init__(self):
        self.ocr_api_url = os.getenv("OCR_API_URL", "https://seth0330-ezofisocr.hf.space")
        self.ocr_api_key = os.getenv("OCR_API_KEY", "")
    
    async def analyze_document(self, file_path: str, key_fields: Optional[str] = None) -> Dict[str, Any]:
        """
        Analyze a document using the OCR API
        
        Args:
            file_path: Path to the file to analyze
            key_fields: Optional comma-separated string of key fields to extract
            
        Returns:
            Dictionary containing extracted content and metadata
        """
        try:
            file_path_obj = Path(file_path)
            if not file_path_obj.exists():
                return {
                    "success": False,
                    "error": "File not found",
                    "extracted_content": None
                }
            
            # Determine if this is a document that should be analyzed
            file_type = self._get_file_type(file_path)
            if file_type not in ["document", "image"]:
                return {
                    "success": True,
                    "extracted_content": None,
                    "message": f"File type {file_type} not suitable for OCR analysis"
                }
            
            # Read file content
            with open(file_path, 'rb') as f:
                files = {'file': (file_path_obj.name, f, self._get_content_type(file_path))}
                data = {}
                if key_fields:
                    data['key_fields'] = key_fields
                
                headers = {}
                if self.ocr_api_key:
                    headers["X-API-Key"] = self.ocr_api_key
                
                async with httpx.AsyncClient(timeout=60.0) as client:
                    response = await client.post(
                        f"{self.ocr_api_url}/api/extract",
                        headers=headers,
                        files=files,
                        data=data
                    )
                    
                    if response.status_code == 200:
                        result = response.json()
                        return {
                            "success": True,
                            "extracted_content": result,
                            "message": "Document analyzed successfully"
                        }
                    else:
                        return {
                            "success": False,
                            "error": f"OCR API returned status {response.status_code}: {response.text}",
                            "extracted_content": None
                        }
        except Exception as e:
            return {
                "success": False,
                "error": str(e),
                "extracted_content": None
            }
    
    async def analyze_image(self, file_path: str) -> Dict[str, Any]:
        """
        Analyze an image using GPT-4 Vision (for screenshots, infographics, etc.)
        This is a placeholder for future implementation
        
        Args:
            file_path: Path to the image file
            
        Returns:
            Dictionary containing image analysis
        """
        # TODO: Implement GPT-4 Vision analysis for images
        # For now, return a placeholder
        return {
            "success": True,
            "extracted_content": {
                "type": "image",
                "message": "Image analysis not yet implemented"
            },
            "message": "Image analysis placeholder"
        }
    
    def _get_file_type(self, file_path: str) -> str:
        """Determine file type from extension"""
        ext = Path(file_path).suffix.lower()
        document_extensions = ['.pdf', '.doc', '.docx', '.txt', '.rtf']
        image_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.svg']
        video_extensions = ['.mp4', '.avi', '.mov', '.wmv', '.flv']
        
        if ext in document_extensions:
            return "document"
        elif ext in image_extensions:
            return "image"
        elif ext in video_extensions:
            return "video"
        else:
            return "unknown"
    
    def _get_content_type(self, file_path: str) -> str:
        """Get MIME type for file"""
        ext = Path(file_path).suffix.lower()
        content_types = {
            '.pdf': 'application/pdf',
            '.doc': 'application/msword',
            '.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
            '.txt': 'text/plain',
            '.jpg': 'image/jpeg',
            '.jpeg': 'image/jpeg',
            '.png': 'image/png',
            '.gif': 'image/gif',
        }
        return content_types.get(ext, 'application/octet-stream')
    
    def extract_key_insights(self, extracted_content: Dict[str, Any]) -> str:
        """
        Extract key insights from OCR results to use as context for AI content generation
        
        Args:
            extracted_content: The JSON response from OCR API
            
        Returns:
            Formatted string with key insights
        """
        if not extracted_content:
            return ""
        
        insights = []
        
        # Extract structured data if available
        if isinstance(extracted_content, dict):
            # Look for common fields
            for key, value in extracted_content.items():
                if value and key not in ['raw_text', 'confidence', 'metadata']:
                    if isinstance(value, (str, int, float)):
                        insights.append(f"{key}: {value}")
                    elif isinstance(value, list) and len(value) > 0:
                        insights.append(f"{key}: {', '.join(map(str, value[:5]))}")
            
            # Extract raw text if available
            if 'raw_text' in extracted_content:
                raw_text = extracted_content['raw_text']
                if isinstance(raw_text, str) and len(raw_text) > 0:
                    # Summarize long text
                    if len(raw_text) > 500:
                        insights.append(f"Document content: {raw_text[:500]}...")
                    else:
                        insights.append(f"Document content: {raw_text}")
        
        return "\n".join(insights) if insights else "No specific insights extracted"