import os
import subprocess
import sys

# 🚀 الخدعة: تسطيب المكتبة الجاهزة في الخلفية لتجاوز مشاكل البناء
print("جاري تسطيب المكتبة الخفيفة...")
try:
    import llama_cpp
except ImportError:
    subprocess.check_call([
        sys.executable, "-m", "pip", "install", "llama-cpp-python",
        "--extra-index-url", "https://abetlen.github.io/llama-cpp-python/whl/cpu"
    ])

import json
import faiss
import gradio as gr
from sentence_transformers import SentenceTransformer
from huggingface_hub import hf_hub_download
from llama_cpp import Llama 
from groq import Groq

# 1. إعداد Groq
groq_api_key = os.environ.get("GROQ_API_KEY")
groq_client = Groq(api_key=groq_api_key)

# 2. إعداد قاعدة بيانات الـ RAG
print("جاري إعداد قاعدة بيانات الـ RAG...")
with open("rag_corpus_auto.json", "r", encoding="utf-8") as f:
    rag_documents = json.load(f)

embedder = SentenceTransformer('intfloat/multilingual-e5-small')
embeddings = embedder.encode(rag_documents, normalize_embeddings=True)
index = faiss.IndexFlatIP(embeddings.shape[1])
index.add(embeddings)

# 3. تحميل الموديل
print("جاري تحميل الموديل من حسابك...")
model_path = hf_hub_download(
    repo_id="zed344/plant-disease-bot", 
    filename="Qwen2.5-7B-Instruct.Q4_K_M.gguf"
)

# تشغيل الموديل مع إعدادات السرعة الفائقة
llm = Llama(
    model_path=model_path,
    n_ctx=512,        # قللنا السياق جداً عشان الـ RAM والسرعة
    n_threads=8,      # استغلال أقصى قدرة للمعالج
    verbose=False
)

alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
"""

def convert_to_arabic_numerals(text):
    english_to_arabic = str.maketrans('0123456789', '٠١٢٣٤٥٦٧٨٩')
    return text.translate(english_to_arabic)

# 4. دالة المعالجة مع الـ Streaming (الرد اللحظي)
def plant_doctor_bot(user_query, history):
    query_emb = embedder.encode([user_query], normalize_embeddings=True)
    D, I = index.search(query_emb, k=3)
    
    # لو المعلومة موجودة في الـ RAG
    if D[0][0] >= 0.88:
        context_chunks = [rag_documents[i] for i in I[0]]
        context = "\n---\n".join(context_chunks)
        
        prompt = alpaca_prompt.format(
            "أنت مهندس زراعي خبير. أجب باختصار ومباشرة بناءً على المعلومات الموثوقة فقط. لا تستخدم إيموجي.",
            f"المعلومات الموثوقة:\n{context}\n\nالسؤال: {user_query}"
        )
        
        # تفعيل الـ Streaming من الموديل بتاعك
        response_generator = llm(
            prompt,
            max_tokens=300,
            stop=["### Instruction:", "### Input:"],
            temperature=0.0,
            stream=True
        )
        
        full_text = ""
        for chunk in response_generator:
            token = chunk["choices"][0]["text"]
            full_text += token
            # نبعت النص تدريجياً للـ UI
            yield convert_to_arabic_numerals(full_text)
            
    else:
        # لو المعلومة مش عندنا، نستخدم Groq (سريع جداً بطبعه)
        chat_completion = groq_client.chat.completions.create(
            messages=[
                {"role": "system", "content": "أنت مهندس زراعي خبير. أجب بالعربية الفصحى وبشكل مباشر وبدون رموز تعبيرية."},
                {"role": "user", "content": user_query}
            ],
            model="llama-3.3-70b-versatile",
            temperature=0.0,
            stream=True # تفعيل الـ Streaming لـ Groq أيضاً
        )
        
        full_text = ""
        for chunk in chat_completion:
            if chunk.choices[0].delta.content:
                full_text += chunk.choices[0].delta.content
                yield convert_to_arabic_numerals(full_text)

# 5. واجهة المستخدم (Gradio 6.0)
custom_css = """
.gradio-container { direction: rtl !important; text-align: right !important; font-family: 'Tahoma', sans-serif; }
"""

with gr.Blocks() as demo:
    gr.Markdown("<h1 style='text-align: center;'>🌿 طبيب النبات الذكي (نسخة السرعة القصوى)</h1>")
    gr.ChatInterface(fn=plant_doctor_bot)

if __name__ == "__main__":
    demo.launch(css=custom_css)