Matrix Agent
v3.2: Speed optimizations - OpenBLAS, dual models (7B/1.5B), model selector, timing display
1b50d66
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Anthropic-Compatible API Dashboard</title>
<script src="https://cdn.tailwindcss.com"></script>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/github-dark.min.css">
<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js"></script>
<style>
.gradient-bg { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); }
.card { background: rgba(255,255,255,0.95); backdrop-filter: blur(10px); }
pre code { font-size: 0.85rem !important; }
.status-dot { animation: pulse 2s infinite; }
@keyframes pulse { 0%, 100% { opacity: 1; } 50% { opacity: 0.5; } }
</style>
</head>
<body class="min-h-screen gradient-bg">
<div class="container mx-auto px-4 py-8">
<!-- Header -->
<div class="text-center mb-8">
<h1 class="text-4xl font-bold text-white mb-2">Anthropic-Compatible API</h1>
<p class="text-purple-100 text-lg">Self-hosted Claude-compatible endpoint powered by Qwen2.5-Coder-7B</p>
</div>
<!-- Status Cards -->
<div class="grid md:grid-cols-3 gap-6 mb-8">
<div class="card rounded-xl p-6 shadow-lg">
<div class="flex items-center justify-between mb-4">
<h3 class="text-lg font-semibold text-gray-800">API Status</h3>
<span id="status-indicator" class="status-dot w-3 h-3 bg-yellow-400 rounded-full"></span>
</div>
<div id="api-status" class="text-sm text-gray-600">Checking...</div>
</div>
<div class="card rounded-xl p-6 shadow-lg">
<div class="flex items-center justify-between mb-4">
<h3 class="text-lg font-semibold text-gray-800">Queue Status</h3>
<svg class="w-5 h-5 text-purple-500" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M4 6h16M4 12h16M4 18h16"/>
</svg>
</div>
<div id="queue-status" class="text-sm text-gray-600">Loading...</div>
</div>
<div class="card rounded-xl p-6 shadow-lg">
<div class="flex items-center justify-between mb-4">
<h3 class="text-lg font-semibold text-gray-800">Cache Stats</h3>
<svg class="w-5 h-5 text-purple-500" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9 12l2 2 4-4m5.618-4.016A11.955 11.955 0 0112 2.944a11.955 11.955 0 01-8.618 3.04A12.02 12.02 0 003 9c0 5.591 3.824 10.29 9 11.622 5.176-1.332 9-6.03 9-11.622 0-1.042-.133-2.052-.382-3.016z"/>
</svg>
</div>
<div id="cache-status" class="text-sm text-gray-600">Loading...</div>
</div>
</div>
<!-- Models Section -->
<div class="card rounded-xl p-6 shadow-lg mb-8">
<h3 class="text-xl font-bold text-gray-800 mb-4">Available Models</h3>
<p class="text-sm text-gray-500 mb-4">Choose based on your needs: 7B for quality, 1.5B for speed (3x faster)</p>
<div id="models-list" class="space-y-3">Loading models...</div>
</div>
<!-- Quick Start Guide -->
<div class="card rounded-xl p-6 shadow-lg mb-8">
<h3 class="text-xl font-bold text-gray-800 mb-4">Quick Start</h3>
<div class="space-y-6">
<!-- Claude Code -->
<div>
<h4 class="font-semibold text-purple-700 mb-2 flex items-center gap-2">
<svg class="w-5 h-5" fill="currentColor" viewBox="0 0 24 24"><path d="M12 2L2 7l10 5 10-5-10-5zM2 17l10 5 10-5M2 12l10 5 10-5"/></svg>
Claude Code CLI
</h4>
<pre><code class="language-bash"># Set environment variables
export ANTHROPIC_API_KEY="any-key"
export ANTHROPIC_BASE_URL="https://likhonsheikh-anthropic-compatible-api.hf.space/anthropic"
# Run Claude Code with custom model
claude --model qwen2.5-coder-7b "Write a hello world in Python"</code></pre>
</div>
<!-- Python SDK -->
<div>
<h4 class="font-semibold text-purple-700 mb-2 flex items-center gap-2">
<svg class="w-5 h-5" fill="currentColor" viewBox="0 0 24 24"><path d="M12 2C6.48 2 2 6.48 2 12s4.48 10 10 10 10-4.48 10-10S17.52 2 12 2z"/></svg>
Python SDK
</h4>
<pre><code class="language-python">import anthropic
client = anthropic.Anthropic(
api_key="any-key",
base_url="https://likhonsheikh-anthropic-compatible-api.hf.space/anthropic"
)
message = client.messages.create(
model="qwen2.5-coder-7b",
max_tokens=1024,
messages=[{"role": "user", "content": "Hello!"}]
)
print(message.content[0].text)</code></pre>
</div>
<!-- cURL -->
<div>
<h4 class="font-semibold text-purple-700 mb-2 flex items-center gap-2">
<svg class="w-5 h-5" fill="currentColor" viewBox="0 0 24 24"><path d="M8 5v14l11-7z"/></svg>
cURL
</h4>
<pre><code class="language-bash">curl -X POST "https://likhonsheikh-anthropic-compatible-api.hf.space/anthropic/v1/messages" \
-H "Content-Type: application/json" \
-H "x-api-key: any-key" \
-H "anthropic-version: 2023-06-01" \
-d '{
"model": "qwen2.5-coder-7b",
"max_tokens": 256,
"messages": [{"role": "user", "content": "Hello!"}]
}'</code></pre>
</div>
</div>
</div>
<!-- Interactive Playground -->
<div class="card rounded-xl p-6 shadow-lg mb-8">
<h3 class="text-xl font-bold text-gray-800 mb-4">Try it Now</h3>
<div class="space-y-4">
<div class="flex gap-4 items-center">
<label class="text-sm font-medium text-gray-700">Model:</label>
<select id="model-select" class="px-4 py-2 border border-gray-200 rounded-lg focus:ring-2 focus:ring-purple-500">
<option value="qwen2.5-coder-7b">qwen2.5-coder-7b (Quality)</option>
<option value="qwen2.5-coder-1.5b">qwen2.5-coder-1.5b (3x Faster)</option>
</select>
<span id="speed-indicator" class="text-xs text-gray-500"></span>
</div>
<textarea id="prompt-input" class="w-full p-4 border border-gray-200 rounded-lg focus:ring-2 focus:ring-purple-500 focus:border-transparent" rows="3" placeholder="Enter your prompt here...">Hello! Can you write a simple Python function that calculates factorial?</textarea>
<div class="flex gap-4">
<button onclick="sendMessage()" class="bg-purple-600 hover:bg-purple-700 text-white font-semibold py-2 px-6 rounded-lg transition">
Send Message
</button>
<button onclick="sendStreamingMessage()" class="bg-indigo-600 hover:bg-indigo-700 text-white font-semibold py-2 px-6 rounded-lg transition">
Stream Response
</button>
</div>
<div id="response-output" class="bg-gray-50 rounded-lg p-4 min-h-[100px] text-sm font-mono whitespace-pre-wrap hidden"></div>
<div id="timing-info" class="text-xs text-gray-500 hidden"></div>
</div>
</div>
<!-- Logs Viewer -->
<div class="card rounded-xl p-6 shadow-lg mb-8">
<div class="flex items-center justify-between mb-4">
<h3 class="text-xl font-bold text-gray-800">Live Logs</h3>
<button onclick="fetchLogs()" class="text-purple-600 hover:text-purple-800 text-sm font-medium">Refresh</button>
</div>
<div id="logs-output" class="bg-gray-900 text-green-400 rounded-lg p-4 h-64 overflow-y-auto text-xs font-mono">
Click "Refresh" to load logs...
</div>
</div>
<!-- API Endpoints Reference -->
<div class="card rounded-xl p-6 shadow-lg">
<h3 class="text-xl font-bold text-gray-800 mb-4">API Endpoints</h3>
<div class="overflow-x-auto">
<table class="w-full text-sm">
<thead class="bg-gray-50">
<tr>
<th class="px-4 py-3 text-left font-semibold text-gray-600">Method</th>
<th class="px-4 py-3 text-left font-semibold text-gray-600">Endpoint</th>
<th class="px-4 py-3 text-left font-semibold text-gray-600">Description</th>
</tr>
</thead>
<tbody class="divide-y divide-gray-100">
<tr><td class="px-4 py-3"><span class="bg-green-100 text-green-700 px-2 py-1 rounded text-xs font-semibold">GET</span></td><td class="px-4 py-3 font-mono text-purple-600">/</td><td class="px-4 py-3 text-gray-600">Health check with full status</td></tr>
<tr><td class="px-4 py-3"><span class="bg-green-100 text-green-700 px-2 py-1 rounded text-xs font-semibold">GET</span></td><td class="px-4 py-3 font-mono text-purple-600">/health</td><td class="px-4 py-3 text-gray-600">Simple health check</td></tr>
<tr><td class="px-4 py-3"><span class="bg-green-100 text-green-700 px-2 py-1 rounded text-xs font-semibold">GET</span></td><td class="px-4 py-3 font-mono text-purple-600">/logs</td><td class="px-4 py-3 text-gray-600">View API logs</td></tr>
<tr><td class="px-4 py-3"><span class="bg-green-100 text-green-700 px-2 py-1 rounded text-xs font-semibold">GET</span></td><td class="px-4 py-3 font-mono text-purple-600">/queue/status</td><td class="px-4 py-3 text-gray-600">Request queue statistics</td></tr>
<tr><td class="px-4 py-3"><span class="bg-green-100 text-green-700 px-2 py-1 rounded text-xs font-semibold">GET</span></td><td class="px-4 py-3 font-mono text-purple-600">/models/status</td><td class="px-4 py-3 text-gray-600">Loaded models info</td></tr>
<tr><td class="px-4 py-3"><span class="bg-blue-100 text-blue-700 px-2 py-1 rounded text-xs font-semibold">POST</span></td><td class="px-4 py-3 font-mono text-purple-600">/anthropic/v1/messages</td><td class="px-4 py-3 text-gray-600">Anthropic Messages API</td></tr>
<tr><td class="px-4 py-3"><span class="bg-blue-100 text-blue-700 px-2 py-1 rounded text-xs font-semibold">POST</span></td><td class="px-4 py-3 font-mono text-purple-600">/v1/chat/completions</td><td class="px-4 py-3 text-gray-600">OpenAI Chat API</td></tr>
<tr><td class="px-4 py-3"><span class="bg-green-100 text-green-700 px-2 py-1 rounded text-xs font-semibold">GET</span></td><td class="px-4 py-3 font-mono text-purple-600">/anthropic/v1/models</td><td class="px-4 py-3 text-gray-600">List available models</td></tr>
</tbody>
</table>
</div>
</div>
<!-- Footer -->
<div class="text-center mt-8 text-purple-100">
<p>Built with llama.cpp + FastAPI | Model: Qwen2.5-Coder-7B-Instruct (Q4_K_M)</p>
<p class="mt-2 text-sm opacity-75">Open source and self-hostable</p>
</div>
</div>
<script>
hljs.highlightAll();
const BASE_URL = window.location.origin;
async function fetchStatus() {
try {
const res = await fetch(BASE_URL + '/api/status');
const data = await res.json();
document.getElementById('status-indicator').className = 'status-dot w-3 h-3 bg-green-400 rounded-full';
document.getElementById('api-status').innerHTML = `
<div class="font-semibold text-green-600">Online</div>
<div class="text-xs text-gray-500 mt-1">Version: ${data.version || 'N/A'}</div>
`;
if (data.queue) {
document.getElementById('queue-status').innerHTML = `
<div>Active: <span class="font-semibold">${data.queue.active_requests || 0}</span></div>
<div>Queue: <span class="font-semibold">${data.queue.queue_length || 0}</span></div>
<div class="text-xs text-gray-500 mt-1">Total: ${data.queue.stats?.total_requests || 0}</div>
`;
}
if (data.cache) {
document.getElementById('cache-status').innerHTML = `
<div>Hit Rate: <span class="font-semibold">${data.cache.hit_rate || '0%'}</span></div>
<div>Size: <span class="font-semibold">${data.cache.size || 0}/${data.cache.max_size || 10}</span></div>
`;
}
if (data.models) {
const modelsHtml = data.models.map(m => `
<div class="flex items-center justify-between p-3 bg-gray-50 rounded-lg">
<div>
<span class="font-semibold">${m.id}</span>
<span class="text-sm text-gray-500 ml-2">${m.size} (${m.quantization})</span>
</div>
<div class="flex items-center gap-2">
${m.loaded ? '<span class="bg-green-100 text-green-700 px-2 py-1 rounded text-xs">Loaded</span>' : '<span class="bg-gray-100 text-gray-600 px-2 py-1 rounded text-xs">Available</span>'}
${m.default ? '<span class="bg-purple-100 text-purple-700 px-2 py-1 rounded text-xs">Default</span>' : ''}
</div>
</div>
`).join('');
document.getElementById('models-list').innerHTML = modelsHtml;
}
} catch (e) {
document.getElementById('status-indicator').className = 'status-dot w-3 h-3 bg-red-400 rounded-full';
document.getElementById('api-status').innerHTML = '<span class="text-red-600">Offline or Building</span>';
}
}
async function sendMessage() {
const prompt = document.getElementById('prompt-input').value;
const model = document.getElementById('model-select').value;
const output = document.getElementById('response-output');
const timing = document.getElementById('timing-info');
output.classList.remove('hidden');
output.textContent = 'Sending...';
timing.classList.add('hidden');
const startTime = Date.now();
try {
const res = await fetch(BASE_URL + '/anthropic/v1/messages', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'x-api-key': 'test-key',
'anthropic-version': '2023-06-01'
},
body: JSON.stringify({
model: model,
max_tokens: 1024,
messages: [{ role: 'user', content: prompt }]
})
});
const data = await res.json();
const elapsed = ((Date.now() - startTime) / 1000).toFixed(2);
output.textContent = data.content?.[0]?.text || JSON.stringify(data, null, 2);
timing.textContent = `Response time: ${elapsed}s | Model: ${model} | Tokens: ${data.usage?.output_tokens || 'N/A'}`;
timing.classList.remove('hidden');
} catch (e) {
output.textContent = 'Error: ' + e.message;
}
}
async function sendStreamingMessage() {
const prompt = document.getElementById('prompt-input').value;
const model = document.getElementById('model-select').value;
const output = document.getElementById('response-output');
const timing = document.getElementById('timing-info');
output.classList.remove('hidden');
output.textContent = '';
timing.classList.add('hidden');
const startTime = Date.now();
let tokenCount = 0;
try {
const res = await fetch(BASE_URL + '/anthropic/v1/messages', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'x-api-key': 'test-key',
'anthropic-version': '2023-06-01'
},
body: JSON.stringify({
model: model,
max_tokens: 1024,
stream: true,
messages: [{ role: 'user', content: prompt }]
})
});
const reader = res.body.getReader();
const decoder = new TextDecoder();
while (true) {
const { done, value } = await reader.read();
if (done) break;
const chunk = decoder.decode(value);
const lines = chunk.split('\n');
for (const line of lines) {
if (line.startsWith('data: ')) {
try {
const data = JSON.parse(line.slice(6));
if (data.delta?.text) {
output.textContent += data.delta.text;
tokenCount++;
}
} catch {}
}
}
}
const elapsed = ((Date.now() - startTime) / 1000).toFixed(2);
const tokensPerSec = (tokenCount / elapsed).toFixed(1);
timing.textContent = `Time: ${elapsed}s | Model: ${model} | ~${tokenCount} tokens | ${tokensPerSec} tok/s`;
timing.classList.remove('hidden');
} catch (e) {
output.textContent = 'Error: ' + e.message;
}
}
async function fetchLogs() {
try {
const res = await fetch(BASE_URL + '/logs?lines=50');
const data = await res.json();
document.getElementById('logs-output').textContent = data.logs || 'No logs available';
} catch (e) {
document.getElementById('logs-output').textContent = 'Error loading logs: ' + e.message;
}
}
// Initial fetch
fetchStatus();
setInterval(fetchStatus, 30000);
</script>
</body>
</html>