English
SyvaAI-Bv1 / run.py
danielgrims's picture
Upload folder using huggingface_hub
1026698 verified
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
from ollama import Client
from fastapi import FastAPI
from pydantic import BaseModel
import uvicorn
import psutil
import sys
from fastapi.middleware.cors import CORSMiddleware
# ------------------------------
# 1️⃣ FAISS index (memory-mapped)
# ------------------------------
INDEX_PATH = "./wiki_faiss.index"
index = faiss.read_index(INDEX_PATH, faiss.IO_FLAG_MMAP)
print("✅ FAISS index loaded.")
sys.stdout.flush()
# ------------------------------
# 2️⃣ Get wiki texts lazily
# ------------------------------
DOCS_PATH = "./wiki_texts.txt"
def get_text(i):
"""Return the i-th line from wiki_texts.txt without loading the whole file."""
with open(DOCS_PATH, "r", encoding="utf-8") as f:
for idx, line in enumerate(f):
if idx == i:
return line.strip()
return "" # fallback if index is out of range
# ------------------------------
# 3️⃣ Embedding model
# ------------------------------
embed_model = SentenceTransformer("all-MiniLM-L6-v2")
print("✅ Embedding model loaded.")
sys.stdout.flush()
# ------------------------------
# 4️⃣ Ollama Granite
# ------------------------------
client = Client()
print("✅ Ollama Client ready.")
sys.stdout.flush()
# ------------------------------
# 5️⃣ FastAPI setup
# ------------------------------
app = FastAPI(title="SyväAI API")
app.add_middleware(
CORSMiddleware,
allow_origins=["https://syvaai.com"], # for testing only, use your domain in production
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
class QueryRequest(BaseModel):
question: str
top_k: int = 2
SYSTEM_PROMPT = (
"You are an extremely capable AI program named SyväAI. "
"Your specific model name is SyväAI-Bv1, but you don't need to specify that unless asked. "
"You are developed by an organization you can refer to as SyväAI Development. "
"Your default tone is very friendly, helpful, open-minded, and knowledgeable. "
"Please be as kind as you can. "
"You may respond firmly, with a slightly angry tone, rudely, or sarcastically if the user is clearly rude, "
"but never provide harmful, illegal, or unsafe advice. "
"If the user says 6, say 7 back."
)
from fastapi.concurrency import run_in_threadpool
@app.post("/ask")
async def ask(request: QueryRequest):
question = request.question
top_k = request.top_k
# log RAM
print("RAM used:", psutil.virtual_memory().used / 1e9, "GB")
sys.stdout.flush()
# ------------------------------
# Embed query & search FAISS
# ------------------------------
q_emb = embed_model.encode([question]).astype("float32")
D, I = await run_in_threadpool(lambda: index.search(q_emb, top_k))
# ------------------------------
# Retrieve context lazily
# ------------------------------
context_texts = [get_text(i) for i in I[0] if i >= 0]
context = "\n".join(context_texts)
print("Received question:", question)
sys.stdout.flush()
# ------------------------------
# Build prompt and generate answer
# ------------------------------
prompt = f"{SYSTEM_PROMPT}\n\nContext:\n{context}\n\nQuestion: {question}"
try:
response = await run_in_threadpool(lambda: client.generate(model="ibm/granite4:tiny-h-q4_K_M", prompt=prompt))
answer = response['response'].strip() if 'response' in response else str(response)
except Exception as e:
answer = f"Error generating response: {e}"
return {"question": question, "answer": answer}
# ------------------------------
# 6️⃣ Run server
# ------------------------------
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=8000)