from fastapi import FastAPI from pydantic import BaseModel from sentence_transformers import SentenceTransformer import numpy as np import uvicorn app = FastAPI() print("Loading embedding model...") model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") print("Embedding model loaded!") # -------- REQUEST MODELS -------- class EmbeddingRequest(BaseModel): input: str | list[str] class RouterRequest(BaseModel): query: str candidates: list[str] # -------- EMBEDDINGS API -------- @app.post("/v1/embeddings") async def create_embeddings(req: EmbeddingRequest): texts = req.input if isinstance(texts, str): texts = [texts] embeddings = model.encode( texts, normalize_embeddings=True ).tolist() data = [] for i, emb in enumerate(embeddings): data.append({ "embedding": emb, "index": i }) return { "object": "list", "data": data, "model": "auric-embedding" } # -------- SEMANTIC ROUTER API -------- @app.post("/v1/router") async def semantic_router(req: RouterRequest): query_embedding = model.encode( req.query, normalize_embeddings=True ) candidate_embeddings = model.encode( req.candidates, normalize_embeddings=True ) scores = np.dot(candidate_embeddings, query_embedding) best_index = int(np.argmax(scores)) return { "query": req.query, "best_match": req.candidates[best_index], "score": float(scores[best_index]) } # -------- SERVER START -------- if __name__ == "__main__": uvicorn.run(app, host="0.0.0.0", port=7860)