import os
import uuid
import re
import string
import pandas as pd
import gradio as gr
from sentence_transformers import SentenceTransformer
import chromadb
from chromadb.config import Settings
from llama_cpp import Llama
import requests
from pathlib import Path


# ---------------------- Download Model ----------------------
MODEL_URL = "https://huggingface.co/datasets/psy7743/llama3-8b-instruct-Q8_0.gguf/resolve/main/llama3-8b-instruct-Q8_0.gguf"
MODEL_PATH = "llama3-8b-instruct-Q8_0.gguf"

# ---------------------- Download Model ----------------------
# MODEL_URL = "https://huggingface.co/tensorblock/Meta-Llama-3.2-1B-GGUF/resolve/main/Meta-Llama-3.2-1B-Q8_0.gguf"
# MODEL_PATH = "Meta-Llama-3.2-1B-Q8_0.gguf"

if not Path(MODEL_PATH).exists():
    print("📥 Downloading Meta LLaMA 3 8B Q8_0 model from tensorblock...")
    response = requests.get(MODEL_URL, stream=True)
    with open(MODEL_PATH, "wb") as f:
        for chunk in response.iter_content(chunk_size=8192):
            if chunk:
                f.write(chunk)
    print("✅ Download complete!")

# ---------------------- Load Data ----------------------

df = pd.read_csv("jupiter_faqs.csv")

def clean_text(text):
    text = text.lower()
    text = text.translate(str.maketrans('', '', string.punctuation))
    text = text.replace('\n', ' ').replace('\t', ' ')
    return re.sub(r'\s+', ' ', text).strip()

df['clean_question'] = df['question'].apply(clean_text)
df['clean_answer'] = df['answer'].apply(clean_text)
df['document'] = df.apply(lambda row: f"Question: {row['clean_question']}\nAnswer: {row['clean_answer']}", axis=1)

# ---------------------- Embeddings ----------------------

embedding_model = SentenceTransformer('all-mpnet-base-v2')
df['embedding'] = df['clean_question'].apply(lambda x: embedding_model.encode(x).tolist())
df['uid'] = [str(uuid.uuid4()) for _ in range(len(df))]

# ---------------------- ChromaDB ----------------------

persist_dir = "chroma_qa_db"
chroma_client = chromadb.PersistentClient(path=persist_dir, settings=Settings())
collection_name = "qa_collection"

if collection_name in [c.name for c in chroma_client.list_collections()]:
    chroma_client.delete_collection(name=collection_name)

collection = chroma_client.get_or_create_collection(name=collection_name)

if len(collection.get()["ids"]) == 0:
    collection.add(
        documents=df['document'].tolist(),
        embeddings=df['embedding'].tolist(),
        ids=df['uid'].astype(str).tolist()
    )

# ---------------------- LLaMA ----------------------

llm = Llama(
    model_path=MODEL_PATH,
    n_ctx=4096,
    n_threads=8,
    n_gpu_layers=-1,
)

# ---------------------- Inference ----------------------

def search_chroma(query, n_results=5):
    query_embedding = embedding_model.encode(query).tolist()
    results = collection.query(
        query_embeddings=[query_embedding],
        n_results=n_results,
        include=['documents']
    )
    return results["documents"][0]

def generate_response(message, history=[]):
    docs = search_chroma(message)
    context_str = "\n\n".join(docs)

    sys_prompt = """You are a helpful assistant. Based on the context and user query, provide an accurate and concise answer.
    
    Rules:
    - If context is relevant, answer based on it.
    - If context is insufficient, ask for clarification or say it's not possible.
    - Do not hallucinate or make up answers.
    - Keep the tone friendly."""

    prompt = f"{sys_prompt}\n\ncontext:\n{context_str}\n\nQuestion: {message}\nAnswer:"

    response = llm(
        prompt,
        max_tokens=512,
        temperature=0.3,
        top_p=0.8,
        stop=["Q:", "\n"],
        echo=True
    )

    return response["choices"][0]["text"].split("Answer:")[-1].strip()

# ---------------------- Gradio Interface ----------------------

def gradio_chat_interface(message, history):
    reply = generate_response(message)
    history = history + [(message, reply)]
    return history, history

demo = gr.ChatInterface(
    fn=generate_response,
    title="🦙 LLaMA-3 FAQ Chatbot",
    chatbot=gr.Chatbot(label="Ask me anything about Jupiter Money!"),
    examples=[
        "What is Jupiter Edge credit card?",
        "What happens if I miss a payment?",
        "How to change billing address?"
    ],
    cache_examples=False
)

if __name__ == "__main__":
    demo.launch(share=True)