| import faiss |
| import numpy as np |
| from sentence_transformers import SentenceTransformer |
| from ollama import Client |
| from fastapi import FastAPI |
| from pydantic import BaseModel |
| import uvicorn |
| import psutil |
| import sys |
| from fastapi.middleware.cors import CORSMiddleware |
|
|
|
|
| |
| |
| |
| INDEX_PATH = "./wiki_faiss.index" |
| index = faiss.read_index(INDEX_PATH, faiss.IO_FLAG_MMAP) |
| print("✅ FAISS index loaded.") |
| sys.stdout.flush() |
|
|
| |
| |
| |
| DOCS_PATH = "./wiki_texts.txt" |
|
|
| def get_text(i): |
| """Return the i-th line from wiki_texts.txt without loading the whole file.""" |
| with open(DOCS_PATH, "r", encoding="utf-8") as f: |
| for idx, line in enumerate(f): |
| if idx == i: |
| return line.strip() |
| return "" |
|
|
| |
| |
| |
| embed_model = SentenceTransformer("all-MiniLM-L6-v2") |
| print("✅ Embedding model loaded.") |
| sys.stdout.flush() |
|
|
| |
| |
| |
| client = Client() |
| print("✅ Ollama Client ready.") |
| sys.stdout.flush() |
|
|
| |
| |
| |
| app = FastAPI(title="SyväAI API") |
| app.add_middleware( |
| CORSMiddleware, |
| allow_origins=["https://syvaai.com"], |
| allow_credentials=True, |
| allow_methods=["*"], |
| allow_headers=["*"], |
| ) |
|
|
| class QueryRequest(BaseModel): |
| question: str |
| top_k: int = 2 |
|
|
| SYSTEM_PROMPT = ( |
| "You are an extremely capable AI program named SyväAI. " |
| "Your specific model name is SyväAI-Bv1, but you don't need to specify that unless asked. " |
| "You are developed by an organization you can refer to as SyväAI Development. " |
| "Your default tone is very friendly, helpful, open-minded, and knowledgeable. " |
| "Please be as kind as you can. " |
| "You may respond firmly, with a slightly angry tone, rudely, or sarcastically if the user is clearly rude, " |
| "but never provide harmful, illegal, or unsafe advice. " |
| "If the user says 6, say 7 back." |
| ) |
|
|
|
|
| from fastapi.concurrency import run_in_threadpool |
|
|
| @app.post("/ask") |
| async def ask(request: QueryRequest): |
| question = request.question |
| top_k = request.top_k |
|
|
| |
| print("RAM used:", psutil.virtual_memory().used / 1e9, "GB") |
| sys.stdout.flush() |
|
|
| |
| |
| |
| q_emb = embed_model.encode([question]).astype("float32") |
| D, I = await run_in_threadpool(lambda: index.search(q_emb, top_k)) |
|
|
| |
| |
| |
| context_texts = [get_text(i) for i in I[0] if i >= 0] |
| context = "\n".join(context_texts) |
|
|
| print("Received question:", question) |
| sys.stdout.flush() |
|
|
| |
| |
| |
| prompt = f"{SYSTEM_PROMPT}\n\nContext:\n{context}\n\nQuestion: {question}" |
|
|
| try: |
| response = await run_in_threadpool(lambda: client.generate(model="ibm/granite4:tiny-h-q4_K_M", prompt=prompt)) |
| answer = response['response'].strip() if 'response' in response else str(response) |
| except Exception as e: |
| answer = f"Error generating response: {e}" |
|
|
| return {"question": question, "answer": answer} |
|
|
| |
| |
| |
| if __name__ == "__main__": |
| uvicorn.run(app, host="0.0.0.0", port=8000) |
|
|
|
|