Add all files

077d109 11 months ago

13.7 kB

	import os
	import sys
	import json
	import yaml
	import numpy as np
	from typing import List, Dict, Optional, Any
	from fastapi import FastAPI, HTTPException
	from pydantic import BaseModel
	import onnxruntime as ort
	from sentence_transformers import SentenceTransformer
	import logging

	# Configure logging
	logging.basicConfig(
	level=logging.INFO,
	format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
	)
	logger = logging.getLogger(__name__)

	# Define API models
	class RouteRequest(BaseModel):
	query: str
	available_models: List[str]
	performance_weight: float = 1.0
	cost_weight: float = 0.5
	return_scores: bool = False

	class RouteResponse(BaseModel):
	selected_model: str
	scores: Optional[Dict[str, float]] = None

	# Initialize FastAPI app
	app = FastAPI(
	title="Multi-Router-Bandit API",
	description="API for routing queries to the most appropriate LLM",
	version="1.0.0"
	)

	# Global variables
	embedding_model = None
	routing_policy_session = None
	irt_model_session = None
	model_embeddings = None
	models_config = None

	def get_models_dir():
	"""Get the models directory from environment variable or default."""
	return os.environ.get("MODELS_DIR", "models")

	@app.on_event("startup")
	async def startup_event():
	"""Initialize models on startup."""
	global embedding_model, routing_policy_session, irt_model_session, model_embeddings, models_config

	models_dir = get_models_dir()

	# Check if models exist
	if not os.path.exists(os.path.join(models_dir, "routing_policy.onnx")):
	logger.error(f"Routing policy not found at {os.path.join(models_dir, 'routing_policy.onnx')}")
	sys.exit(1)

	if not os.path.exists(os.path.join(models_dir, "irt_model.onnx")):
	logger.error(f"IRT model not found at {os.path.join(models_dir, 'irt_model.onnx')}")
	sys.exit(1)

	if not os.path.exists(os.path.join(models_dir, "model_embeddings.json")):
	logger.error(f"Model embeddings not found at {os.path.join(models_dir, 'model_embeddings.json')}")
	sys.exit(1)

	if not os.path.exists(os.path.join(models_dir, "models_config.yaml")):
	logger.error(f"Models configuration not found at {os.path.join(models_dir, 'models_config.yaml')}")
	sys.exit(1)

	# Load embedding model
	logger.info("Loading embedding model...")
	embedding_model = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")

	# Load ONNX models
	logger.info("Loading ONNX models...")
	routing_policy_session = ort.InferenceSession(os.path.join(models_dir, "routing_policy.onnx"))
	irt_model_session = ort.InferenceSession(os.path.join(models_dir, "irt_model.onnx"))

	# Load model embeddings
	logger.info("Loading model embeddings...")
	with open(os.path.join(models_dir, "model_embeddings.json"), "r") as f:
	model_embeddings = json.load(f)

	# Load models configuration
	logger.info("Loading models configuration...")
	with open(os.path.join(models_dir, "models_config.yaml"), "r") as f:
	models_config = yaml.safe_load(f)

	logger.info("Startup complete")

	def get_model_cost(model_name: str) -> float:
	"""Get the cost of a model."""
	# Check API endpoints
	for provider in models_config.get('api_endpoints', {}).values():
	for model_config in provider.get('models', []):
	if model_config['name'] == model_name:
	return (model_config.get('cost_per_1k_input_tokens', 0) +
	model_config.get('cost_per_1k_output_tokens', 0)) / 2

	# Check open source models
	for model_config in models_config.get('open_source_models', []):
	if model_config['name'] == model_name:
	return model_config.get('cost_per_1k_tokens', 0)

	# Default cost
	return 0.01

	def get_prompt_embedding(query: str) -> np.ndarray:
	"""Get the embedding for a query."""
	return embedding_model.encode([query], convert_to_numpy=True)[0]

	def predict_score(prompt_embedding: np.ndarray, model_embedding: np.ndarray) -> float:
	"""Predict the score for a prompt-model pair using the IRT model."""
	# Prepare inputs
	ort_inputs = {
	"prompt_embedding": prompt_embedding.astype(np.float32).reshape(1, -1),
	"model_embedding": model_embedding.astype(np.float32).reshape(1, -1)
	}

	# Run inference
	ort_outputs = irt_model_session.run(None, ort_inputs)

	# Get score
	score = ort_outputs[0][0][0]

	return float(score)

	def route_query(
	query: str,
	available_models: List[str],
	performance_weight: float = 1.0,
	cost_weight: float = 0.5,
	return_scores: bool = False
	) -> Dict[str, Any]:
	"""Route a query to the most appropriate LLM."""
	# Get prompt embedding
	prompt_embedding = get_prompt_embedding(query)

	# Create preference vector
	preference = np.array([[performance_weight, cost_weight]], dtype=np.float32)

	# Create model contexts
	model_contexts = []
	for model_name in available_models:
	# Get model embedding
	if model_name not in model_embeddings:
	raise HTTPException(status_code=400, detail=f"Model {model_name} not found")

	model_embedding = np.array(model_embeddings[model_name], dtype=np.float32)

	# Get model cost
	model_cost = get_model_cost(model_name)

	# Predict score
	predicted_score = predict_score(prompt_embedding, model_embedding)

	model_contexts.append((model_embedding, model_cost, predicted_score))

	# Prepare inputs for routing policy
	model_embeddings_list = np.array([context[0] for context in model_contexts], dtype=np.float32)
	model_costs = np.array([context[1] for context in model_contexts], dtype=np.float32)
	model_scores = np.array([context[2] for context in model_contexts], dtype=np.float32)

	# Concatenate model contexts
	model_contexts_concat = np.concatenate([
	model_embeddings_list,
	model_costs.reshape(-1, 1),
	model_scores.reshape(-1, 1)
	], axis=1)

	# Prepare inputs for ONNX model
	ort_inputs = {
	"prompt_embedding": prompt_embedding.astype(np.float32).reshape(1, -1),
	"model_contexts": model_contexts_concat.astype(np.float32),
	"preference": preference
	}

	# Run inference
	ort_outputs = routing_policy_session.run(None, ort_inputs)

	# Get action logits
	action_logits = ort_outputs[0]

	# Get action with highest probability
	action = np.argmax(action_logits, axis=1)[0]

	# Get selected model
	selected_model = available_models[action]

	# Prepare response
	response = {
	"selected_model": selected_model
	}

	# Add scores if requested
	if return_scores:
	scores = {}
	for i, model_name in enumerate(available_models):
	scores[model_name] = float(model_contexts[i][2])

	response["scores"] = scores

	return response

	@app.post("/route", response_model=RouteResponse)
	async def route_endpoint(request: RouteRequest):
	"""Route a query to the most appropriate LLM."""
	try:
	result = route_query(
	query=request.query,
	available_models=request.available_models,
	performance_weight=request.performance_weight,
	cost_weight=request.cost_weight,
	return_scores=request.return_scores
	)

	return result
	except Exception as e:
	logger.error(f"Error routing query: {str(e)}")
	raise HTTPException(status_code=500, detail=str(e))

	@app.get("/models")
	async def list_models():
	"""List all available models."""
	models = []

	# Get models from API endpoints
	for provider, provider_config in models_config.get('api_endpoints', {}).items():
	for model_config in provider_config.get('models', []):
	models.append({
	"name": model_config['name'],
	"provider": provider,
	"cost_per_1k_input_tokens": model_config.get('cost_per_1k_input_tokens', 0),
	"cost_per_1k_output_tokens": model_config.get('cost_per_1k_output_tokens', 0),
	"description": model_config.get('description', "")
	})

	# Get open source models
	for model_config in models_config.get('open_source_models', []):
	models.append({
	"name": model_config['name'],
	"provider": "open_source",
	"cost_per_1k_tokens": model_config.get('cost_per_1k_tokens', 0),
	"description": model_config.get('description', "")
	})

	return {"models": models}

	@app.get("/health")
	async def health_check():
	"""Health check endpoint."""
	return {"status": "ok"}

	# Add a simple UI for testing
	from fastapi.staticfiles import StaticFiles
	from fastapi.responses import HTMLResponse

	@app.get("/", response_class=HTMLResponse)
	async def read_root():
	return """
	<!DOCTYPE html>
	<html>
	<head>
	<title>Multi-Router-Bandit API</title>
	<style>
	body {
	font-family: Arial, sans-serif;
	max-width: 800px;
	margin: 0 auto;
	padding: 20px;
	}
	h1 {
	color: #333;
	}
	.form-group {
	margin-bottom: 15px;
	}
	label {
	display: block;
	margin-bottom: 5px;
	}
	input[type="text"], textarea, select {
	width: 100%;
	padding: 8px;
	box-sizing: border-box;
	}
	button {
	background-color: #4CAF50;
	color: white;
	padding: 10px 15px;
	border: none;
	cursor: pointer;
	}
	button:hover {
	background-color: #45a049;
	}
	#result {
	margin-top: 20px;
	padding: 10px;
	border: 1px solid #ddd;
	background-color: #f9f9f9;
	white-space: pre-wrap;
	}
	</style>
	</head>
	<body>
	<h1>Multi-Router-Bandit API</h1>
	<div class="form-group">
	<label for="query">Query:</label>
	<textarea id="query" rows="4" placeholder="Enter your query here"></textarea>
	</div>
	<div class="form-group">
	<label for="models">Available Models (comma-separated):</label>
	<input type="text" id="models" placeholder="gpt-4,llama-3-70b,mistral-7b-instruct,phi-3-mini">
	</div>
	<div class="form-group">
	<label for="performance-weight">Performance Weight:</label>
	<input type="number" id="performance-weight" value="1.0" step="0.1" min="0">
	</div>
	<div class="form-group">
	<label for="cost-weight">Cost Weight:</label>
	<input type="number" id="cost-weight" value="0.5" step="0.1" min="0">
	</div>
	<div class="form-group">
	<label for="return-scores">Return Scores:</label>
	<select id="return-scores">
	<option value="true">Yes</option>
	<option value="false">No</option>
	</select>
	</div>
	<button onclick="routeQuery()">Route Query</button>
	<div id="result"></div>

	<script>
	async function routeQuery() {
	const query = document.getElementById('query').value;
	const modelsStr = document.getElementById('models').value;
	const performanceWeight = parseFloat(document.getElementById('performance-weight').value);
	const costWeight = parseFloat(document.getElementById('cost-weight').value);
	const returnScores = document.getElementById('return-scores').value === 'true';

	const models = modelsStr.split(',').map(m => m.trim()).filter(m => m);

	if (!query) {
	alert('Please enter a query');
	return;
	}

	if (models.length === 0) {
	alert('Please enter at least one model');
	return;
	}

	const resultDiv = document.getElementById('result');
	resultDiv.textContent = 'Loading...';

	try {
	const response = await fetch('/route', {
	method: 'POST',
	headers: {
	'Content-Type': 'application/json'
	},
	body: JSON.stringify({
	query,
	available_models: models,
	performance_weight: performanceWeight,
	cost_weight: costWeight,
	return_scores: returnScores
	})
	});

	const data = await response.json();

	if (response.ok) {
	resultDiv.textContent = JSON.stringify(data, null, 2);
	} else {
	resultDiv.textContent = `Error: ${data.detail}`;
	}
	} catch (error) {
	resultDiv.textContent = `Error: ${error.message}`;
	}
	}
	</script>
	</body>
	</html>
	"""