English
File size: 3,805 Bytes
1026698
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
from ollama import Client
from fastapi import FastAPI
from pydantic import BaseModel
import uvicorn
import psutil
import sys
from fastapi.middleware.cors import CORSMiddleware


# ------------------------------
# 1️⃣ FAISS index (memory-mapped)
# ------------------------------
INDEX_PATH = "./wiki_faiss.index"
index = faiss.read_index(INDEX_PATH, faiss.IO_FLAG_MMAP)
print("✅ FAISS index loaded.")
sys.stdout.flush()

# ------------------------------
# 2️⃣ Get wiki texts lazily
# ------------------------------
DOCS_PATH = "./wiki_texts.txt"

def get_text(i):
    """Return the i-th line from wiki_texts.txt without loading the whole file."""
    with open(DOCS_PATH, "r", encoding="utf-8") as f:
        for idx, line in enumerate(f):
            if idx == i:
                return line.strip()
    return ""  # fallback if index is out of range

# ------------------------------
# 3️⃣ Embedding model
# ------------------------------
embed_model = SentenceTransformer("all-MiniLM-L6-v2")
print("✅ Embedding model loaded.")
sys.stdout.flush()

# ------------------------------
# 4️⃣ Ollama Granite
# ------------------------------
client = Client()
print("✅ Ollama Client ready.")
sys.stdout.flush()

# ------------------------------
# 5️⃣ FastAPI setup
# ------------------------------
app = FastAPI(title="SyväAI API")
app.add_middleware(
    CORSMiddleware,
    allow_origins=["https://syvaai.com"],  # for testing only, use your domain in production
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

class QueryRequest(BaseModel):
    question: str
    top_k: int = 2

SYSTEM_PROMPT = (
    "You are an extremely capable AI program named SyväAI. "
    "Your specific model name is SyväAI-Bv1, but you don't need to specify that unless asked. "
    "You are developed by an organization you can refer to as SyväAI Development. "
    "Your default tone is very friendly, helpful, open-minded, and knowledgeable. "
    "Please be as kind as you can. "
    "You may respond firmly, with a slightly angry tone, rudely, or sarcastically if the user is clearly rude, "
    "but never provide harmful, illegal, or unsafe advice. "
    "If the user says 6, say 7 back."
)


from fastapi.concurrency import run_in_threadpool

@app.post("/ask")
async def ask(request: QueryRequest):
    question = request.question
    top_k = request.top_k

    # log RAM
    print("RAM used:", psutil.virtual_memory().used / 1e9, "GB")
    sys.stdout.flush()

    # ------------------------------
    # Embed query & search FAISS
    # ------------------------------
    q_emb = embed_model.encode([question]).astype("float32")
    D, I = await run_in_threadpool(lambda: index.search(q_emb, top_k))

    # ------------------------------
    # Retrieve context lazily
    # ------------------------------
    context_texts = [get_text(i) for i in I[0] if i >= 0]
    context = "\n".join(context_texts)

    print("Received question:", question)
    sys.stdout.flush()

    # ------------------------------
    # Build prompt and generate answer
    # ------------------------------
    prompt = f"{SYSTEM_PROMPT}\n\nContext:\n{context}\n\nQuestion: {question}"

    try:
        response = await run_in_threadpool(lambda: client.generate(model="ibm/granite4:tiny-h-q4_K_M", prompt=prompt))
        answer = response['response'].strip() if 'response' in response else str(response)
    except Exception as e:
        answer = f"Error generating response: {e}"

    return {"question": question, "answer": answer}

# ------------------------------
# 6️⃣ Run server
# ------------------------------
if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=8000)