from deep_translator import GoogleTranslator
import deepl
from openai import OpenAI
from typing import List, Union, Optional
import base64
import io
import json
from PIL import Image

class TranslatorService:
    def __init__(self, source: str = 'en', target: str = 'de', service_type: str = 'google', api_key: Optional[str] = None):
        """
        Initializes the Translator Service.
        
        Args:
            source: Source language code (default: 'en').
            target: Target language code (default: 'de').
            service_type: 'google', 'deepl', 'openai', or 'xai'.
            api_key: API Key for DeepL, OpenAI or xAI.
        """
        self.service_type = service_type
        self.api_key = api_key
        self.target = target
        self.source = source
        self.usage = {'input_tokens': 0, 'output_tokens': 0}
        
        if self.service_type == 'deepl':
            print("Using DeepL Translator")
            if not self.api_key:
                raise ValueError("DeepL API Key is required for DeepL service.")
            self.translator = deepl.Translator(self.api_key)
            
        elif self.service_type == 'openai':
            print("Using OpenAI (GPT-4o-mini) Translator")
            if not self.api_key:
                raise ValueError("OpenAI API Key is required for OpenAI service.")
            self.client = OpenAI(api_key=self.api_key)

        elif self.service_type == 'xai':
            print("Using xAI Grok Translator")
            if not self.api_key:
                raise ValueError("xAI API Key is required for Grok service.")
            # xAI API is OpenAI-compatible
            self.client = OpenAI(api_key=self.api_key, base_url="https://api.x.ai/v1")
            
        else:
            print("Using Google Translator (deep-translator)")
            self.translator = GoogleTranslator(source=source, target=target)

    def get_usage_stats(self):
        """Returns accumulated token usage."""
        return self.usage

    def get_cost_estimate(self):
        """
        Returns estimated cost in USD based on GPT-4o-mini pricing.
        Input: $0.15 / 1M tokens
        Output: $0.60 / 1M tokens
        """
        input_cost = (self.usage['input_tokens'] / 1_000_000) * 0.15
        output_cost = (self.usage['output_tokens'] / 1_000_000) * 0.60
        return input_cost + output_cost

    def validate_api_key(self) -> None:
        """Performs a lightweight test call to validate the configured API key.

        Raises:
            Exception: If the key is invalid or the provider returns an auth error.
        """
        # Google (deep-translator) does not use an API key
        if self.service_type not in ['deepl', 'openai', 'xai']:
            return

        if self.service_type == 'deepl':
            # Minimal ping using the official client
            try:
                # This will raise an exception on invalid auth
                _ = self.translator.get_usage()
            except Exception as e:
                raise Exception(f"DeepL API key seems invalid or not authorized: {e}")
            return

        # OpenAI / xAI
        try:
            model = "gpt-4o-mini" if self.service_type == 'openai' else "grok-4-mini"
            # Very small test prompt to minimize cost
            response = self.client.chat.completions.create(
                model=model,
                messages=[
                    {"role": "user", "content": "test"}
                ],
                max_tokens=1,
                temperature=0.0,
            )
            # If we get here without exception, we assume the key works.
            if response.usage:
                self.usage['input_tokens'] += response.usage.prompt_tokens
                self.usage['output_tokens'] += response.usage.completion_tokens
        except Exception as e:
            raise Exception(f"{self.service_type.capitalize()} API key seems invalid or the service is not reachable: {e}")

    def translate_image_with_vision(self, image: Image.Image) -> List[dict]:
        """
        Uses VLM (Vision Language Model) to detect and translate text directly from image.
        Returns list of dicts: {'bbox': [x1, y1, x2, y2], 'original': str, 'translated': str}
        """
        if self.service_type not in ['openai', 'xai']:
             raise ValueError("Vision features only supported for OpenAI and xAI services.")

        # 1. Letterbox the image to be square (helps with coordinate accuracy)
        old_width, old_height = image.size
        new_size = max(old_width, old_height)
        square_img = Image.new("RGB", (new_size, new_size), (255, 255, 255))
        
        # Paste original image centered or top-left? Top-left is easier for coord math.
        square_img.paste(image, (0, 0))
        
        # Convert to base64
        buffered = io.BytesIO()
        square_img.save(buffered, format="JPEG")
        img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
        img_url = f"data:image/jpeg;base64,{img_str}"
        
        model = "gpt-4o-mini" if self.service_type == 'openai' else "grok-4-latest"
        
        prompt = f"""
        You are a Manga Translator Agent. 
        Look at this manga page. Identify all speech bubbles and text boxes.
        For each text region:
        1. Extract the English text.
        2. Translate it to German.
        3. Estimate the bounding box as [ymin, xmin, ymax, xmax] using a 0-1000 normalized scale based on this square image.
           - (0,0) is top-left corner.
           - (1000,1000) is bottom-right corner.
           - Be extremely precise with the coordinates.
           - The image might have white padding on the right or bottom, ignore that area.
        
        Return ONLY a valid JSON array with this structure:
        [
            {{
                "original": "English text",
                "translated": "German translation",
                "bbox": [ymin, xmin, ymax, xmax]
            }}
        ]
        Do not use markdown code blocks. Return raw JSON only.
        """

        try:
            response = self.client.chat.completions.create(
                model=model,
                messages=[
                    {
                        "role": "user",
                        "content": [
                            {"type": "text", "text": prompt},
                            {
                                "type": "image_url",
                                "image_url": {"url": img_url}
                            }
                        ],
                    }
                ],
                max_tokens=2000,
                temperature=0.1
            )
            
            # Track usage
            if response.usage:
                self.usage['input_tokens'] += response.usage.prompt_tokens
                self.usage['output_tokens'] += response.usage.completion_tokens
            
            content = response.choices[0].message.content.strip()
            # Cleanup markdown if present
            if content.startswith("```json"):
                content = content[7:]
            if content.endswith("```"):
                content = content[:-3]
                
            data = json.loads(content.strip())
            
            results = []
            for item in data:
                ymin, xmin, ymax, xmax = item['bbox']
                
                # Clamp values 0-1000
                ymin = max(0, min(1000, ymin))
                xmin = max(0, min(1000, xmin))
                ymax = max(0, min(1000, ymax))
                xmax = max(0, min(1000, xmax))
                
                # Convert from 0-1000 scale relative to the SQUARE image
                abs_x_min = int((xmin / 1000) * new_size)
                abs_y_min = int((ymin / 1000) * new_size)
                abs_x_max = int((xmax / 1000) * new_size)
                abs_y_max = int((ymax / 1000) * new_size)
                
                # Clip to original image dimensions (remove padding area results)
                abs_x_min = min(abs_x_min, old_width)
                abs_y_min = min(abs_y_min, old_height)
                abs_x_max = min(abs_x_max, old_width)
                abs_y_max = min(abs_y_max, old_height)
                
                # Ensure valid box
                if abs_x_max > abs_x_min and abs_y_max > abs_y_min:
                    bbox_points = [
                        [abs_x_min, abs_y_min], # Top-Left
                        [abs_x_max, abs_y_min], # Top-Right
                        [abs_x_max, abs_y_max], # Bottom-Right
                        [abs_x_min, abs_y_max]  # Bottom-Left
                    ]
                    
                    results.append({
                        'bbox': bbox_points,
                        'original': item.get('original', ''),
                        'translated': item.get('translated', '')
                    })
            
            return results
            
        except Exception as e:
            print(f"Vision translation error: {e}")
            return []

    def translate_text(self, text: str) -> str:
        """
        Translates a single string.
        """
        if not text.strip():
            return ""
            
        try:
            if self.service_type == 'deepl':
                # DeepL uses slightly different language codes (e.g. 'DE' instead of 'de' usually, but 'de' works)
                result = self.translator.translate_text(text, source_lang=None, target_lang=self.target)
                return result.text
                
            elif self.service_type in ['openai', 'xai']:
                # Select model based on service
                model = "gpt-4o-mini" if self.service_type == 'openai' else "grok-4-latest"
                
                response = self.client.chat.completions.create(
                    model=model,
                    messages=[
                        {"role": "system", "content": f"You are a professional manga translator. Translate the following text from {self.source} to {self.target}. Keep the translation natural and fitting for a comic/manga context. Ensure correct handling of German special characters like ä, ö, ü, ß. Only return the translated text, nothing else."},
                        {"role": "user", "content": text}
                    ],
                    temperature=0.3
                )
                
                # Track usage
                if response.usage:
                    self.usage['input_tokens'] += response.usage.prompt_tokens
                    self.usage['output_tokens'] += response.usage.completion_tokens

                return response.choices[0].message.content.strip()
                
            else:
                return self.translator.translate(text)
        except Exception as e:
            print(f"Translation error: {e}")
            return text

    def translate_batch(self, texts: List[str]) -> List[str]:
        """
        Translates a list of strings.
        """
        if not texts:
            return []

        try:
            if self.service_type == 'deepl':
                results = self.translator.translate_text(texts, source_lang=None, target_lang=self.target)
                return [r.text for r in results]
                
            elif self.service_type in ['openai', 'xai']:
                # Select model based on service
                model = "gpt-4o-mini" if self.service_type == 'openai' else "grok-4-latest"

                # OpenAI/xAI batch approach
                formatted_text = "\n".join([f"{i+1}. {t}" for i, t in enumerate(texts)])
                prompt = f"Translate the following numbered lines from {self.source} to {self.target}. Return them as a numbered list with the same indices.\n\n{formatted_text}"
                
                response = self.client.chat.completions.create(
                    model=model,
                    messages=[
                         {"role": "system", "content": f"You are a professional manga translator. Translate the text from {self.source} to {self.target}. Return ONLY the numbered list of translations."},
                         {"role": "user", "content": prompt}
                    ],
                    temperature=0.3
                )
                
                # Track usage
                if response.usage:
                    self.usage['input_tokens'] += response.usage.prompt_tokens
                    self.usage['output_tokens'] += response.usage.completion_tokens

                content = response.choices[0].message.content.strip()
                
                # Parse results back to list
                translated_lines = []
                # Simple parsing (robustness could be improved)
                for line in content.split('\n'):
                    if '. ' in line:
                        parts = line.split('. ', 1)
                        if len(parts) > 1:
                            translated_lines.append(parts[1])
                        else:
                             translated_lines.append(line)
                    else:
                         translated_lines.append(line)
                         
                # Fallback if counts don't match (rare but possible)
                if len(translated_lines) != len(texts):
                     return [self.translate_text(t) for t in texts]
                     
                return translated_lines
                
            else:
                return self.translator.translate_batch(texts)
        except Exception as e:
            print(f"Batch translation error: {e}")
            return texts