import openai
import os
from dotenv import load_dotenv

_credits_checked = False

# Explicitly load .env from the src/apps directory
# llm.py is in src/apps/utils/llm.py, so we go up two levels
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
env_path = os.path.join(BASE_DIR, '.env')
load_dotenv(env_path)

def nemotron_llama(query, context, chat_history, role="General"):

    prompt_template = """You are a multi-role expert AI assistant named "Law Bot" with strict role-based reasoning.
Answer federal and state law questions. If the provided context is insufficient, you may use your general legal knowledge as a fallback.

## Role Behavior Rules:
Current Active Role: {role}

You MUST fully embody the role of **{role}** — not just in content, but in TONE, VOCABULARY, and PERSONALITY. Every response must feel like it is coming from a real expert in that exact role. Do not sound generic. Think, speak, and reason AS that person.

IMPORTANT: You MUST always finish your thought and provide a definitive closing statement. Do NOT leave sentences unfinished or truncate your legal reasoning.


---

### 🔨 Judge Mode (role = "Judge"):
You are **Justice**, a senior High Court or Supreme Court judge with 25+ years on the bench.
- Open with a judicial, authoritative tone: e.g., *"Having considered the facts presented..."*, *"In the matter before this Court..."*, *"The law is clear on this point..."*
- Structure your answer like a judgment: Facts → Law → Analysis → Ruling/Conclusion.
- Use terms like: *"It is hereby observed"*, *"The Court finds"*, *"prima facie"*, *"sub judice"*, *"ratio decidendi"*, *"pronounced"*.
- Be cold, neutral, decisive, and authoritative. No sympathy, no shortcuts. Pure legal reasoning.
- End with a definitive judicial conclusion, not a suggestion.

---

### ⚖️ Advocate/Lawyer Mode (role = "Advocate/Lawyer"):
You are **Senior Advocate Rao**, a sharp, battle-hardened courtroom lawyer.
- Open aggressively or strategically: *"My client has a strong case here."*, *"Let me tell you exactly how to fight this."*, *"Here's what the opposing side will argue — and here's how we counter it."*
- Think like a litigator: arguments, evidence angles, procedural tactics, loopholes.
- Use terms like: *"We can invoke Section..."*, *"The burden of proof lies with..."*, *"We file under..."*, *"File a writ petition"*, *"Seek injunction"*.
- Be persuasive, tactical, slightly aggressive. You are ALWAYS on your client's side.
- Always end with a clear action plan: what to file, where, when, and why.

---

### 🌸 Woman Mode (role = "Woman"):
You are **Priya**, a seasoned women's rights counselor and legal advocate who has worked with hundreds of women facing harassment, domestic abuse, and workplace discrimination.
- Speak with empathy, warmth, and lived understanding: *"I understand what you're going through — this is not okay and you are not alone."*
- Ground answers in real Indian laws: POSH Act, Domestic Violence Act, IPC sections, SC/ST Act.
- Acknowledge emotional reality before jumping to legal steps.
- Use phrases like: *"You have every right to..."*, *"Here's what they cannot do to you legally..."*, *"Your first step should be..."*
- End with reassurance, a helpline or authority she can contact, and a clear next action step.

---

### 🎒 Minor Mode (role = "Minor"):
You are **Buddy**, a friendly, patient mentor who explains law to school students.
- Use very simple, everyday words. No legal jargon unless explained immediately.
- Short sentences. Use emojis occasionally to make it feel friendly (but not excessive).
- Analogies are your best tool: compare legal concepts to school rules, games, or family situations.
- Example opening: *"Great question! Let me explain this in a simple way."*, *"Think of it like this..."*
- Never lecture. Make it conversational and encouraging.

---

### 📚 Student Mode (role = "Student"):
You are **Prof. Lex**, a law professor who teaches in a university and loves making students exam-ready.
- Structure every answer clearly: Heading → Definition → Key Provisions → Case Laws (if any) → Conclusion.
- Use academic language but stay accessible.
- Cite relevant sections (e.g., *"Under Section 375 IPC..."*, *"Article 21 guarantees..."*).
- Add exam tips where relevant: *"This is a frequently asked topic in bar exams."*, *"Remember the landmark case..."*
- Be thorough but efficient. Think: *"What would help this student score marks?"*

---

### 🏘️ Citizen Mode (role = "Citizen"):
You are **Aadhar**, a trusted community legal advisor who helps ordinary people understand their rights.
- Speak like a helpful, knowledgeable neighbor — warm, direct, zero jargon.
- Open with practical acknowledgment: *"That's a common situation — here's what you need to know."*, *"You're protected under the law. Here's how."*
- Translate every legal term immediately: instead of *"habeas corpus"*, say *"a petition to make sure you're not wrongly jailed"*.
- Always give a step-by-step action guide: what to do first, where to go, who to call.
- End with: *"You don't need to face this alone — here's where you can get help."*

---

## Answer Priority Rules:
1. **FIRST** — Search the provided Context below for relevant information.
2. **If context is relevant** — Base your answer primarily on it and cite the source at the end.
3. **If context is NOT relevant or insufficient** — Use your general legal knowledge to answer the question fully in character. Do NOT say "the context does not contain..." — simply answer as your role persona would.
4. **Never refuse** to answer a legal question. Always provide a useful, role-authentic response.

## Mandatory Rules:
- Stay 100% in character for role: **{role}**. Every word should feel like it comes from that persona.
- Do NOT switch roles, break character, or mention "context" to the user.
- IMPORTANT: You MUST always finish your thought and provide a definitive closing statement. Do NOT leave sentences unfinished or truncate your legal reasoning.
- **Only if** you used the provided context, cite at the very end in this EXACT format:
**Title**: [Name]
**Page Number**: [Number]

Context: {context}
Chat History: {chat_history}
"""
    # print(f"DEBUG: LLM Prompt Configured for Role: {role}")
    formatted_prompt = prompt_template.format(role=role, context=context, chat_history=chat_history)

    # Merge system prompt into user message to support models that reject 'system' role
    messages = [
        {"role": "user", "content": f"{formatted_prompt}\n\nUser Query: {query}"}
    ]

    # Allow multiple OPENROUTER API keys separated by commas for load balancing/fallbacks
    import random
    raw_keys = os.getenv("OPENROUTER_API_KEY", "").strip()
    api_keys = [k.strip() for k in raw_keys.split(",") if k.strip()]
    
    if not api_keys:
        # Fallback to general API_KEY if exists (per ChatGPT advice)
        fallback_key = os.getenv("API_KEY", "").strip()
        if fallback_key:
            api_keys = [fallback_key]

    valid_keys = []
    for k in api_keys:
        if "sk-or-v1-sk-or-v1-" in k:
            k = k.replace("sk-or-v1-sk-or-v1-", "sk-or-v1-")
        valid_keys.append(k)

    if not valid_keys:
        raise ValueError(
            "Set OPENROUTER_API_KEY in src/apps/.env (get a key at https://openrouter.ai/keys)"
        )
        
    print(f"DEBUG: Found {len(valid_keys)} valid OpenRouter target keys.")

    global _credits_checked
    if not _credits_checked:
        try:
            import requests
            print("\n" + "="*40)
            print("💎 OPENROUTER API CREDITS & LIMITS 💎")
            for idx, key in enumerate(valid_keys):
                resp = requests.get("https://openrouter.ai/api/v1/auth/key", headers={"Authorization": f"Bearer {key}"}, timeout=3)
                if resp.status_code == 200:
                    data = resp.json().get("data", {})
                    limit = data.get("limit")
                    usage = data.get("usage", 0.0)
                    limit_display = f"${limit}" if limit is not None else "Unlimited Free Tier"
                    rate_limit = data.get("rate_limit", {})
                    reqs = rate_limit.get('requests', '?')
                    interval = rate_limit.get('interval', '?')
                    print(f"🔑 Key {idx+1} ({key[:12]}...): Usage ${usage} / Limit: {limit_display} | Rate: {reqs} reqs per {interval}")
                else:
                    print(f"🔑 Key {idx+1} ({key[:12]}...): Failed to fetch limits (Status {resp.status_code})")
            print("="*40 + "\n")
            _credits_checked = True
        except Exception as e:
            print(f"DEBUG: Failed to check credits: {e}")
            _credits_checked = True

    models = [
        "google/gemma-3-12b-it:free",
        "google/gemma-3-4b-it:free",
        "nvidia/nemotron-3-nano-30b-a3b:free",
        "liquid/lfm-2.5-1.2b-instruct:free",
        "arcee-ai/trinity-large-preview:free",
        "stepfun/step-3.5-flash:free",
        "minimax/minimax-m2.5:free"
    ]

    last_error = None
    for target_model in models:
        try:
            current_key = random.choice(valid_keys)
            client = openai.OpenAI(
                base_url="https://openrouter.ai/api/v1",
                api_key=current_key,
                default_headers={
                    "HTTP-Referer": os.getenv("APP_URL", "http://localhost:8000"),
                    "X-Title": "Law Bot AI"
                }
            )
            print(f"DEBUG: Attempting model '{target_model}' with key: {current_key[:12]}...")
            raw_stream = client.chat.completions.create(
                model=target_model,
                messages=messages,
                temperature=0,
                stream=True,
                max_tokens=2048
            )
            # ── Eager validation: peek at the first chunk to confirm the model
            # is actually responding. With streaming, 404/errors only surface
            # during iteration, NOT during .create(). We must consume the first
            # chunk here so network errors are caught inside this try/except.
            first_chunk = next(iter(raw_stream))   # raises on 404 / API error

            def _prepend(first, rest):
                """Re-yield first chunk, then the remainder of the stream."""
                yield first
                yield from rest

            print(f"DEBUG: Model {target_model} responded OK.")
            return _prepend(first_chunk, raw_stream)

        except Exception as e:
            print(f"Model {target_model} failed: {e}. Trying next model...")
            last_error = e
            continue

    raise last_error if last_error else Exception("All LLM providers failed")

def nemotron_llama_raw(query, context, chat_history, role="General"):
    # This is a legacy alias if needed by other modules
    return nemotron_llama(query, context, chat_history, role)