Spaces:

samuellimabraz
/

quantum-assistant

Sleeping

quantum-assistant / src /app /api /status /route.ts

github-actions[bot]

Deploy demo from GitHub Actions - 2025-12-24 02:23:20

6cdce85 25 days ago

4.35 kB

	import { NextResponse } from 'next/server';

	export interface RunPodHealth {
	jobs: {
	completed: number;
	failed: number;
	inProgress: number;
	inQueue: number;
	retried: number;
	};
	workers: {
	idle: number;
	initializing: number;
	running: number;
	throttled: number;
	};
	}

	export interface StatusResponse {
	status: 'ready' \| 'cold_start' \| 'initializing' \| 'processing' \| 'unavailable';
	message: string;
	workers: {
	idle: number;
	running: number;
	initializing: number;
	};
	queue: {
	inProgress: number;
	inQueue: number;
	};
	estimatedWait?: number; // seconds
	}

	/**
	* Check RunPod endpoint health to provide user feedback during cold starts
	*/
	export async function GET(): Promise<NextResponse<StatusResponse>> {
	const baseUrl = process.env.DEMO_MODEL_URL \|\| 'http://localhost:8000/v1';
	const apiKey = process.env.DEMO_API_KEY \|\| '';

	// Extract RunPod endpoint URL from the vLLM base URL
	// vLLM URL format: https://api.runpod.ai/v2/{endpoint_id}/openai/v1
	// Health URL format: https://api.runpod.ai/v2/{endpoint_id}/health
	const runpodMatch = baseUrl.match(/https:\/\/api\.runpod\.ai\/v2\/([^/]+)/);

	if (!runpodMatch) {
	// Not a RunPod endpoint, assume it's always ready (local/other provider)
	return NextResponse.json({
	status: 'ready',
	message: 'Model server ready',
	workers: { idle: 1, running: 0, initializing: 0 },
	queue: { inProgress: 0, inQueue: 0 },
	});
	}

	const endpointId = runpodMatch[1];
	const healthUrl = `https://api.runpod.ai/v2/${endpointId}/health`;

	try {
	const response = await fetch(healthUrl, {
	method: 'GET',
	headers: {
	'Authorization': `Bearer ${apiKey}`,
	'Content-Type': 'application/json',
	},
	// Short timeout for health check
	signal: AbortSignal.timeout(5000),
	});

	if (!response.ok) {
	return NextResponse.json({
	status: 'unavailable',
	message: 'Unable to check model status',
	workers: { idle: 0, running: 0, initializing: 0 },
	queue: { inProgress: 0, inQueue: 0 },
	});
	}

	const health: RunPodHealth = await response.json();

	const totalWorkers = health.workers.idle + health.workers.running + (health.workers.initializing \|\| 0);
	const hasActiveWorkers = totalWorkers > 0;
	const hasIdleWorkers = health.workers.idle > 0;
	const isInitializing = (health.workers.initializing \|\| 0) > 0;
	const hasQueuedJobs = health.jobs.inQueue > 0;
	const hasRunningJobs = health.jobs.inProgress > 0;

	let status: StatusResponse['status'];
	let message: string;
	let estimatedWait: number \| undefined;

	if (hasIdleWorkers) {
	status = 'ready';
	message = 'Model ready';
	} else if (isInitializing) {
	status = 'initializing';
	message = 'Model loading...';
	estimatedWait = 30; // Typical vLLM model load time
	} else if (health.workers.running > 0) {
	status = 'processing';
	message = hasQueuedJobs
	? `Processing (${health.jobs.inQueue} in queue)`
	: 'Processing request...';
	estimatedWait = hasQueuedJobs ? health.jobs.inQueue * 15 : undefined;
	} else if (!hasActiveWorkers && (hasQueuedJobs \|\| hasRunningJobs)) {
	status = 'cold_start';
	message = 'Starting worker...';
	estimatedWait = 45; // Cold start + model load
	} else if (!hasActiveWorkers) {
	status = 'cold_start';
	message = 'Workers scaled to zero, will start on request';
	estimatedWait = 45;
	} else {
	status = 'ready';
	message = 'Model ready';
	}

	return NextResponse.json({
	status,
	message,
	workers: {
	idle: health.workers.idle,
	running: health.workers.running,
	initializing: health.workers.initializing \|\| 0,
	},
	queue: {
	inProgress: health.jobs.inProgress,
	inQueue: health.jobs.inQueue,
	},
	estimatedWait,
	});
	} catch (error) {
	console.error('Health check error:', error);

	// Network error might indicate cold start
	return NextResponse.json({
	status: 'cold_start',
	message: 'Connecting to model server...',
	workers: { idle: 0, running: 0, initializing: 0 },
	queue: { inProgress: 0, inQueue: 0 },
	estimatedWait: 45,
	});
	}
	}