import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
from ollama import Client
from fastapi import FastAPI
from pydantic import BaseModel
import uvicorn
import psutil
import sys
from fastapi.middleware.cors import CORSMiddleware


# ------------------------------
# 1️⃣ FAISS index (memory-mapped)
# ------------------------------
INDEX_PATH = "./wiki_faiss.index"
index = faiss.read_index(INDEX_PATH, faiss.IO_FLAG_MMAP)
print("✅ FAISS index loaded.")
sys.stdout.flush()

# ------------------------------
# 2️⃣ Get wiki texts lazily
# ------------------------------
DOCS_PATH = "./wiki_texts.txt"

def get_text(i):
    """Return the i-th line from wiki_texts.txt without loading the whole file."""
    with open(DOCS_PATH, "r", encoding="utf-8") as f:
        for idx, line in enumerate(f):
            if idx == i:
                return line.strip()
    return ""  # fallback if index is out of range

# ------------------------------
# 3️⃣ Embedding model
# ------------------------------
embed_model = SentenceTransformer("all-MiniLM-L6-v2")
print("✅ Embedding model loaded.")
sys.stdout.flush()

# ------------------------------
# 4️⃣ Ollama Granite
# ------------------------------
client = Client()
print("✅ Ollama Client ready.")
sys.stdout.flush()

# ------------------------------
# 5️⃣ FastAPI setup
# ------------------------------
app = FastAPI(title="SyväAI API")
app.add_middleware(
    CORSMiddleware,
    allow_origins=["https://syvaai.com"],  # for testing only, use your domain in production
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

class QueryRequest(BaseModel):
    question: str
    top_k: int = 2

SYSTEM_PROMPT = (
    "You are an extremely capable AI program named SyväAI. "
    "Your specific model name is SyväAI-Bv1, but you don't need to specify that unless asked. "
    "You are developed by an organization you can refer to as SyväAI Development. "
    "Your default tone is very friendly, helpful, open-minded, and knowledgeable. "
    "Please be as kind as you can. "
    "You may respond firmly, with a slightly angry tone, rudely, or sarcastically if the user is clearly rude, "
    "but never provide harmful, illegal, or unsafe advice. "
    "If the user says 6, say 7 back."
)


from fastapi.concurrency import run_in_threadpool

@app.post("/ask")
async def ask(request: QueryRequest):
    question = request.question
    top_k = request.top_k

    # log RAM
    print("RAM used:", psutil.virtual_memory().used / 1e9, "GB")
    sys.stdout.flush()

    # ------------------------------
    # Embed query & search FAISS
    # ------------------------------
    q_emb = embed_model.encode([question]).astype("float32")
    D, I = await run_in_threadpool(lambda: index.search(q_emb, top_k))

    # ------------------------------
    # Retrieve context lazily
    # ------------------------------
    context_texts = [get_text(i) for i in I[0] if i >= 0]
    context = "\n".join(context_texts)

    print("Received question:", question)
    sys.stdout.flush()

    # ------------------------------
    # Build prompt and generate answer
    # ------------------------------
    prompt = f"{SYSTEM_PROMPT}\n\nContext:\n{context}\n\nQuestion: {question}"

    try:
        response = await run_in_threadpool(lambda: client.generate(model="ibm/granite4:tiny-h-q4_K_M", prompt=prompt))
        answer = response['response'].strip() if 'response' in response else str(response)
    except Exception as e:
        answer = f"Error generating response: {e}"

    return {"question": question, "answer": answer}

# ------------------------------
# 6️⃣ Run server
# ------------------------------
if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=8000)