import json
from typing import Dict, List

import google.generativeai as genai
from sentence_transformers import SentenceTransformer


class GeminiService:
    _embedding_model_cache: dict[str, SentenceTransformer] = {}

    def __init__(self, api_key: str, model_name: str, embedding_model_name: str) -> None:
        if not api_key:
            raise ValueError("GEMINI_API_KEY is not set.")
        genai.configure(api_key=api_key)
        self._model = genai.GenerativeModel(model_name)
        self._embedding_model = self._get_embedding_model(embedding_model_name)

    def embed_text(self, text: str, task_type: str) -> List[float]:
        # Small local HF embeddings for RAG; task_type kept for API compatibility.
        _ = task_type
        emb = self._embedding_model.encode(text, normalize_embeddings=False)
        return emb.tolist()

    def summarize_multilingual(self, course_name: str, syllabus_text: str) -> Dict[str, str]:
        prompt = f"""
You are an academic assistant.
Summarize the course syllabus content for course: {course_name}.

Return STRICT JSON with this exact schema and keys only:
{{
  "en": "English summary",
  "mr": "Marathi summary",
  "kn": "Kannada summary",
  "hn": "Hindi summary"
}}

Rules:
- Keep each summary clear for students and parents.
- 80-140 words per language.
- Use only the syllabus context below.

Syllabus context:
{syllabus_text[:12000]}
"""
        raw = self._model.generate_content(prompt).text
        return self._safe_parse_summary_json(raw)

    def generate_markdown(self, prompt: str) -> str:
        return self._model.generate_content(prompt).text

    def chat_with_context(
        self,
        query: str,
        lang_code: str,
        history: List[dict],
        student_info: dict,
        rag_chunks: List[str],
    ) -> str:
        history_text = "\n".join(
            [f"{msg.get('role', 'user')}: {msg.get('content', '')}" for msg in history]
        )
        syllabus_context = "\n\n---\n\n".join(rag_chunks[:8])

        prompt = f"""
You are a helpful college assistant chatbot for students and parents.

Respond in language code: {lang_code}
Supported codes: en, hn, mr, kn.
Return the final answer in markdown.

Grounding rules:
- Prioritize facts from "Relevant syllabus context" for syllabus/unit/module questions.
- If user asks for units/modules/topics of a course and context includes them, list them clearly.
- Do not say data is missing unless the relevant syllabus context truly does not contain it.

Relevant syllabus context:
{syllabus_context}

Student data (attendance, result etc.):
{json.dumps(student_info, ensure_ascii=False)}

Recent chat history:
{history_text}

User query:
{query}

Answer guidelines:
- Be accurate and grounded in provided info.
- If data is missing, state what is missing.
- Keep response practical and concise.
- Use markdown with bullets or short headings when useful.
"""
        return self._model.generate_content(prompt).text

    def _safe_parse_summary_json(self, raw: str) -> Dict[str, str]:
        text = raw.strip()
        if text.startswith("```"):
            text = text.strip("`")
            if text.startswith("json"):
                text = text[4:].strip()

        parsed = json.loads(text)
        return {
            "en": str(parsed.get("en", "")),
            "mr": str(parsed.get("mr", "")),
            "kn": str(parsed.get("kn", "")),
            "hn": str(parsed.get("hn", "")),
        }

    @classmethod
    def _get_embedding_model(cls, embedding_model_name: str) -> SentenceTransformer:
        if embedding_model_name not in cls._embedding_model_cache:
            cls._embedding_model_cache[embedding_model_name] = SentenceTransformer(
                embedding_model_name
            )
        return cls._embedding_model_cache[embedding_model_name]

    @classmethod
    def preload_embedding_model(cls, embedding_model_name: str) -> None:
        model = cls._get_embedding_model(embedding_model_name)
        # Warm up once so the first real query does not pay model initialization cost.
        model.encode("embedding warmup", normalize_embeddings=False)