import os
from typing import List, Tuple

import gradio as gr
from dotenv import load_dotenv
from huggingface_hub import InferenceClient

# Load environment variables from .env if it exists
load_dotenv()

HF_TOKEN = os.getenv("HF_TOKEN")
HF_MODEL_ID = os.getenv("HF_MODEL_ID", "Qwen/Qwen2.5-1.5B-Instruct")
HF_ENDPOINT_URL = os.getenv("HF_ENDPOINT_URL", "").strip()
SYSTEM_PROMPT = os.getenv(
    "HF_SYSTEM_PROMPT",
    "You are a concise and helpful AI assistant.",
)

# Not strictly requiring HF_TOKEN at import time so that
# the UI can still come up on Hugging Face Spaces. We will
# surface a clear guidance message from within `respond` if
# a token is missing.

# Not creating a global client when we want dynamic model selection; we'll create per-call

# Small, cloud-friendly model suggestions
RECOMMENDED_MODELS = [
    "Qwen/Qwen2.5-1.5B-Instruct",
    "Qwen/Qwen2.5-3B-Instruct",
    "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
]


def format_prompt(message: str, history: List[Tuple[str, str]]) -> str:
    conversation = [f"System: {SYSTEM_PROMPT}"]
    for user_msg, assistant_msg in history:
        if user_msg:
            conversation.append(f"User: {user_msg}")
        if assistant_msg:
            conversation.append(f"Assistant: {assistant_msg}")
    conversation.append(f"User: {message}")
    conversation.append("Assistant:")
    return "\n".join(conversation)


def respond(
    message: str,
    history: List[Tuple[str, str]],
    model_id: str = HF_MODEL_ID,
    temperature: float = 0.7,
    max_new_tokens: int = 512,
):
    # If no token or endpoint configured, guide the user from the UI.
    if not HF_TOKEN and not HF_ENDPOINT_URL:
        yield (
            "HF_TOKEN ayarlı değil. Hugging Face Space üzerinde Settings > Secrets menüsünden"
            " 'HF_TOKEN' gizli değişkenini ekleyin (veya bir Inference Endpoint URL'si sağlayın)."
        )
        return
    prompt = format_prompt(message, history)
    try:
        # Create client per request to honor selected model or endpoint
        if HF_ENDPOINT_URL:
            local_client = InferenceClient(endpoint=HF_ENDPOINT_URL, token=HF_TOKEN)
        else:
            local_client = InferenceClient(model=(model_id or HF_MODEL_ID), token=HF_TOKEN)

        # Try streaming first
        accumulated = ""
        try:
            stream = local_client.text_generation(
                prompt=prompt,
                max_new_tokens=max_new_tokens,
                temperature=temperature,
                top_p=0.95,
                stream=True,
                details=False,
                return_full_text=False,
            )
            for chunk in stream:
                token_text = None
                # Newer huggingface_hub may return objects with .token.text
                if hasattr(chunk, "token") and getattr(chunk.token, "text", None):
                    token_text = chunk.token.text
                # Fallback for dict responses
                if token_text is None and isinstance(chunk, dict):
                    token = chunk.get("token") or {}
                    token_text = token.get("text") or chunk.get("generated_text")
                # Fallback if a raw string is ever yielded
                if token_text is None and isinstance(chunk, str):
                    token_text = chunk

                if token_text:
                    accumulated += token_text
                    yield accumulated
        except StopIteration:
            # Some servers may prematurely raise StopIteration; we'll fallback to non-streaming
            pass
        except Exception as stream_err:
            # Log and fallback to non-streaming
            print(f"[HF STREAM ERROR] {stream_err}")

        # Fallback: if nothing streamed, try a single-shot generation
        if not accumulated.strip():
            try:
                result = local_client.text_generation(
                    prompt=prompt,
                    max_new_tokens=max_new_tokens,
                    temperature=temperature,
                    top_p=0.95,
                    stream=False,
                    details=False,
                    return_full_text=False,
                )
                if isinstance(result, dict):
                    text = result.get("generated_text", "")
                else:
                    text = str(result)
                yield text if text.strip() else "Modelden cevap alınamadı."
            except Exception as nonstream_err:
                # Surface detailed error to the UI instead of a vague message
                err_text = str(nonstream_err).strip()
                response_text = ""
                if hasattr(nonstream_err, "response"):
                    response = getattr(nonstream_err, "response")
                    response_text = getattr(response, "text", "") or ""
                if response_text and response_text not in err_text:
                    err_text = f"{err_text} | {response_text}".strip(" |")
                if not err_text:
                    err_text = repr(nonstream_err)
                print(f"[HF NON-STREAM ERROR] {err_text}")
                yield f"Bir hata oluştu: {err_text}"
    except StopIteration:
        print("[HF API ERROR] StopIteration: API'den yanıt dönerken veri alınamadı.")
        yield "Bir hata oluştu: API'den yanıt alınamadı (StopIteration)."
    except Exception as err:  # pragma: no cover - surface errors to UI
        err_text = str(err).strip()
        response_text = ""
        if hasattr(err, "response"):
            response = getattr(err, "response")
            response_text = getattr(response, "text", "") or ""
        if response_text and response_text not in err_text:
            err_text = f"{err_text} | {response_text}".strip(" |")
        if "model_not_supported" in err_text or "not supported" in err_text:
            yield (
                "Seçilen model erişilebilir görünmüyor. `.env` içindeki `HF_MODEL_ID` "
                "değerini, hesabınızda etkin olan bir Hugging Face sohbet modeli ile güncellemeyi deneyin."
            )
            return
        if not err_text:
            err_text = repr(err)
        print(f"[HF API ERROR] {err_text}")
        yield f"Bir hata oluştu: {err_text}"


demo = gr.ChatInterface(
    respond,
    title="Gradio HF Agent",
    description=(
        "Hugging Face Inference API ile konuşan basit bir sohbet arayüzü. "
        "Aşağıdan model ve üretim ayarlarını değiştirebilirsiniz."
    ),
    theme="soft",
    additional_inputs=[
        gr.Dropdown(
            label="Model ID",
            info="Hugging Face model repository adı",
            choices=RECOMMENDED_MODELS,
            value=HF_MODEL_ID,
            allow_custom_value=True,
        ),
        gr.Slider(
            label="Sıcaklık (temperature)",
            minimum=0.0,
            maximum=1.0,
            value=0.7,
            step=0.05,
        ),
        gr.Slider(
            label="Maksimum yeni token",
            minimum=16,
            maximum=1024,
            value=512,
            step=16,
        ),
    ],
)

if __name__ == "__main__":
    demo.queue().launch()