Upload folder using huggingface_hub

1026698 verified 6 months ago

3.81 kB

	import faiss
	import numpy as np
	from sentence_transformers import SentenceTransformer
	from ollama import Client
	from fastapi import FastAPI
	from pydantic import BaseModel
	import uvicorn
	import psutil
	import sys
	from fastapi.middleware.cors import CORSMiddleware


	# ------------------------------
	# 1️⃣ FAISS index (memory-mapped)
	# ------------------------------
	INDEX_PATH = "./wiki_faiss.index"
	index = faiss.read_index(INDEX_PATH, faiss.IO_FLAG_MMAP)
	print("✅ FAISS index loaded.")
	sys.stdout.flush()

	# ------------------------------
	# 2️⃣ Get wiki texts lazily
	# ------------------------------
	DOCS_PATH = "./wiki_texts.txt"

	def get_text(i):
	"""Return the i-th line from wiki_texts.txt without loading the whole file."""
	with open(DOCS_PATH, "r", encoding="utf-8") as f:
	for idx, line in enumerate(f):
	if idx == i:
	return line.strip()
	return "" # fallback if index is out of range

	# ------------------------------
	# 3️⃣ Embedding model
	# ------------------------------
	embed_model = SentenceTransformer("all-MiniLM-L6-v2")
	print("✅ Embedding model loaded.")
	sys.stdout.flush()

	# ------------------------------
	# 4️⃣ Ollama Granite
	# ------------------------------
	client = Client()
	print("✅ Ollama Client ready.")
	sys.stdout.flush()

	# ------------------------------
	# 5️⃣ FastAPI setup
	# ------------------------------
	app = FastAPI(title="SyväAI API")
	app.add_middleware(
	CORSMiddleware,
	allow_origins=["https://syvaai.com"], # for testing only, use your domain in production
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)

	class QueryRequest(BaseModel):
	question: str
	top_k: int = 2

	SYSTEM_PROMPT = (
	"You are an extremely capable AI program named SyväAI. "
	"Your specific model name is SyväAI-Bv1, but you don't need to specify that unless asked. "
	"You are developed by an organization you can refer to as SyväAI Development. "
	"Your default tone is very friendly, helpful, open-minded, and knowledgeable. "
	"Please be as kind as you can. "
	"You may respond firmly, with a slightly angry tone, rudely, or sarcastically if the user is clearly rude, "
	"but never provide harmful, illegal, or unsafe advice. "
	"If the user says 6, say 7 back."
	)


	from fastapi.concurrency import run_in_threadpool

	@app.post("/ask")
	async def ask(request: QueryRequest):
	question = request.question
	top_k = request.top_k

	# log RAM
	print("RAM used:", psutil.virtual_memory().used / 1e9, "GB")
	sys.stdout.flush()

	# ------------------------------
	# Embed query & search FAISS
	# ------------------------------
	q_emb = embed_model.encode([question]).astype("float32")
	D, I = await run_in_threadpool(lambda: index.search(q_emb, top_k))

	# ------------------------------
	# Retrieve context lazily
	# ------------------------------
	context_texts = [get_text(i) for i in I[0] if i >= 0]
	context = "\n".join(context_texts)

	print("Received question:", question)
	sys.stdout.flush()

	# ------------------------------
	# Build prompt and generate answer
	# ------------------------------
	prompt = f"{SYSTEM_PROMPT}\n\nContext:\n{context}\n\nQuestion: {question}"

	try:
	response = await run_in_threadpool(lambda: client.generate(model="ibm/granite4:tiny-h-q4_K_M", prompt=prompt))
	answer = response['response'].strip() if 'response' in response else str(response)
	except Exception as e:
	answer = f"Error generating response: {e}"

	return {"question": question, "answer": answer}

	# ------------------------------
	# 6️⃣ Run server
	# ------------------------------
	if __name__ == "__main__":
	uvicorn.run(app, host="0.0.0.0", port=8000)