Matrix Agent
v3.2: Speed optimizations - OpenBLAS, dual models (7B/1.5B), model selector, timing display
1b50d66
| <html lang="en"> | |
| <head> | |
| <meta charset="UTF-8"> | |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
| <title>Anthropic-Compatible API Dashboard</title> | |
| <script src="https://cdn.tailwindcss.com"></script> | |
| <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/github-dark.min.css"> | |
| <script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js"></script> | |
| <style> | |
| .gradient-bg { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); } | |
| .card { background: rgba(255,255,255,0.95); backdrop-filter: blur(10px); } | |
| pre code { font-size: 0.85rem ; } | |
| .status-dot { animation: pulse 2s infinite; } | |
| @keyframes pulse { 0%, 100% { opacity: 1; } 50% { opacity: 0.5; } } | |
| </style> | |
| </head> | |
| <body class="min-h-screen gradient-bg"> | |
| <div class="container mx-auto px-4 py-8"> | |
| <!-- Header --> | |
| <div class="text-center mb-8"> | |
| <h1 class="text-4xl font-bold text-white mb-2">Anthropic-Compatible API</h1> | |
| <p class="text-purple-100 text-lg">Self-hosted Claude-compatible endpoint powered by Qwen2.5-Coder-7B</p> | |
| </div> | |
| <!-- Status Cards --> | |
| <div class="grid md:grid-cols-3 gap-6 mb-8"> | |
| <div class="card rounded-xl p-6 shadow-lg"> | |
| <div class="flex items-center justify-between mb-4"> | |
| <h3 class="text-lg font-semibold text-gray-800">API Status</h3> | |
| <span id="status-indicator" class="status-dot w-3 h-3 bg-yellow-400 rounded-full"></span> | |
| </div> | |
| <div id="api-status" class="text-sm text-gray-600">Checking...</div> | |
| </div> | |
| <div class="card rounded-xl p-6 shadow-lg"> | |
| <div class="flex items-center justify-between mb-4"> | |
| <h3 class="text-lg font-semibold text-gray-800">Queue Status</h3> | |
| <svg class="w-5 h-5 text-purple-500" fill="none" stroke="currentColor" viewBox="0 0 24 24"> | |
| <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M4 6h16M4 12h16M4 18h16"/> | |
| </svg> | |
| </div> | |
| <div id="queue-status" class="text-sm text-gray-600">Loading...</div> | |
| </div> | |
| <div class="card rounded-xl p-6 shadow-lg"> | |
| <div class="flex items-center justify-between mb-4"> | |
| <h3 class="text-lg font-semibold text-gray-800">Cache Stats</h3> | |
| <svg class="w-5 h-5 text-purple-500" fill="none" stroke="currentColor" viewBox="0 0 24 24"> | |
| <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9 12l2 2 4-4m5.618-4.016A11.955 11.955 0 0112 2.944a11.955 11.955 0 01-8.618 3.04A12.02 12.02 0 003 9c0 5.591 3.824 10.29 9 11.622 5.176-1.332 9-6.03 9-11.622 0-1.042-.133-2.052-.382-3.016z"/> | |
| </svg> | |
| </div> | |
| <div id="cache-status" class="text-sm text-gray-600">Loading...</div> | |
| </div> | |
| </div> | |
| <!-- Models Section --> | |
| <div class="card rounded-xl p-6 shadow-lg mb-8"> | |
| <h3 class="text-xl font-bold text-gray-800 mb-4">Available Models</h3> | |
| <p class="text-sm text-gray-500 mb-4">Choose based on your needs: 7B for quality, 1.5B for speed (3x faster)</p> | |
| <div id="models-list" class="space-y-3">Loading models...</div> | |
| </div> | |
| <!-- Quick Start Guide --> | |
| <div class="card rounded-xl p-6 shadow-lg mb-8"> | |
| <h3 class="text-xl font-bold text-gray-800 mb-4">Quick Start</h3> | |
| <div class="space-y-6"> | |
| <!-- Claude Code --> | |
| <div> | |
| <h4 class="font-semibold text-purple-700 mb-2 flex items-center gap-2"> | |
| <svg class="w-5 h-5" fill="currentColor" viewBox="0 0 24 24"><path d="M12 2L2 7l10 5 10-5-10-5zM2 17l10 5 10-5M2 12l10 5 10-5"/></svg> | |
| Claude Code CLI | |
| </h4> | |
| <pre><code class="language-bash"># Set environment variables | |
| export ANTHROPIC_API_KEY="any-key" | |
| export ANTHROPIC_BASE_URL="https://likhonsheikh-anthropic-compatible-api.hf.space/anthropic" | |
| # Run Claude Code with custom model | |
| claude --model qwen2.5-coder-7b "Write a hello world in Python"</code></pre> | |
| </div> | |
| <!-- Python SDK --> | |
| <div> | |
| <h4 class="font-semibold text-purple-700 mb-2 flex items-center gap-2"> | |
| <svg class="w-5 h-5" fill="currentColor" viewBox="0 0 24 24"><path d="M12 2C6.48 2 2 6.48 2 12s4.48 10 10 10 10-4.48 10-10S17.52 2 12 2z"/></svg> | |
| Python SDK | |
| </h4> | |
| <pre><code class="language-python">import anthropic | |
| client = anthropic.Anthropic( | |
| api_key="any-key", | |
| base_url="https://likhonsheikh-anthropic-compatible-api.hf.space/anthropic" | |
| ) | |
| message = client.messages.create( | |
| model="qwen2.5-coder-7b", | |
| max_tokens=1024, | |
| messages=[{"role": "user", "content": "Hello!"}] | |
| ) | |
| print(message.content[0].text)</code></pre> | |
| </div> | |
| <!-- cURL --> | |
| <div> | |
| <h4 class="font-semibold text-purple-700 mb-2 flex items-center gap-2"> | |
| <svg class="w-5 h-5" fill="currentColor" viewBox="0 0 24 24"><path d="M8 5v14l11-7z"/></svg> | |
| cURL | |
| </h4> | |
| <pre><code class="language-bash">curl -X POST "https://likhonsheikh-anthropic-compatible-api.hf.space/anthropic/v1/messages" \ | |
| -H "Content-Type: application/json" \ | |
| -H "x-api-key: any-key" \ | |
| -H "anthropic-version: 2023-06-01" \ | |
| -d '{ | |
| "model": "qwen2.5-coder-7b", | |
| "max_tokens": 256, | |
| "messages": [{"role": "user", "content": "Hello!"}] | |
| }'</code></pre> | |
| </div> | |
| </div> | |
| </div> | |
| <!-- Interactive Playground --> | |
| <div class="card rounded-xl p-6 shadow-lg mb-8"> | |
| <h3 class="text-xl font-bold text-gray-800 mb-4">Try it Now</h3> | |
| <div class="space-y-4"> | |
| <div class="flex gap-4 items-center"> | |
| <label class="text-sm font-medium text-gray-700">Model:</label> | |
| <select id="model-select" class="px-4 py-2 border border-gray-200 rounded-lg focus:ring-2 focus:ring-purple-500"> | |
| <option value="qwen2.5-coder-7b">qwen2.5-coder-7b (Quality)</option> | |
| <option value="qwen2.5-coder-1.5b">qwen2.5-coder-1.5b (3x Faster)</option> | |
| </select> | |
| <span id="speed-indicator" class="text-xs text-gray-500"></span> | |
| </div> | |
| <textarea id="prompt-input" class="w-full p-4 border border-gray-200 rounded-lg focus:ring-2 focus:ring-purple-500 focus:border-transparent" rows="3" placeholder="Enter your prompt here...">Hello! Can you write a simple Python function that calculates factorial?</textarea> | |
| <div class="flex gap-4"> | |
| <button onclick="sendMessage()" class="bg-purple-600 hover:bg-purple-700 text-white font-semibold py-2 px-6 rounded-lg transition"> | |
| Send Message | |
| </button> | |
| <button onclick="sendStreamingMessage()" class="bg-indigo-600 hover:bg-indigo-700 text-white font-semibold py-2 px-6 rounded-lg transition"> | |
| Stream Response | |
| </button> | |
| </div> | |
| <div id="response-output" class="bg-gray-50 rounded-lg p-4 min-h-[100px] text-sm font-mono whitespace-pre-wrap hidden"></div> | |
| <div id="timing-info" class="text-xs text-gray-500 hidden"></div> | |
| </div> | |
| </div> | |
| <!-- Logs Viewer --> | |
| <div class="card rounded-xl p-6 shadow-lg mb-8"> | |
| <div class="flex items-center justify-between mb-4"> | |
| <h3 class="text-xl font-bold text-gray-800">Live Logs</h3> | |
| <button onclick="fetchLogs()" class="text-purple-600 hover:text-purple-800 text-sm font-medium">Refresh</button> | |
| </div> | |
| <div id="logs-output" class="bg-gray-900 text-green-400 rounded-lg p-4 h-64 overflow-y-auto text-xs font-mono"> | |
| Click "Refresh" to load logs... | |
| </div> | |
| </div> | |
| <!-- API Endpoints Reference --> | |
| <div class="card rounded-xl p-6 shadow-lg"> | |
| <h3 class="text-xl font-bold text-gray-800 mb-4">API Endpoints</h3> | |
| <div class="overflow-x-auto"> | |
| <table class="w-full text-sm"> | |
| <thead class="bg-gray-50"> | |
| <tr> | |
| <th class="px-4 py-3 text-left font-semibold text-gray-600">Method</th> | |
| <th class="px-4 py-3 text-left font-semibold text-gray-600">Endpoint</th> | |
| <th class="px-4 py-3 text-left font-semibold text-gray-600">Description</th> | |
| </tr> | |
| </thead> | |
| <tbody class="divide-y divide-gray-100"> | |
| <tr><td class="px-4 py-3"><span class="bg-green-100 text-green-700 px-2 py-1 rounded text-xs font-semibold">GET</span></td><td class="px-4 py-3 font-mono text-purple-600">/</td><td class="px-4 py-3 text-gray-600">Health check with full status</td></tr> | |
| <tr><td class="px-4 py-3"><span class="bg-green-100 text-green-700 px-2 py-1 rounded text-xs font-semibold">GET</span></td><td class="px-4 py-3 font-mono text-purple-600">/health</td><td class="px-4 py-3 text-gray-600">Simple health check</td></tr> | |
| <tr><td class="px-4 py-3"><span class="bg-green-100 text-green-700 px-2 py-1 rounded text-xs font-semibold">GET</span></td><td class="px-4 py-3 font-mono text-purple-600">/logs</td><td class="px-4 py-3 text-gray-600">View API logs</td></tr> | |
| <tr><td class="px-4 py-3"><span class="bg-green-100 text-green-700 px-2 py-1 rounded text-xs font-semibold">GET</span></td><td class="px-4 py-3 font-mono text-purple-600">/queue/status</td><td class="px-4 py-3 text-gray-600">Request queue statistics</td></tr> | |
| <tr><td class="px-4 py-3"><span class="bg-green-100 text-green-700 px-2 py-1 rounded text-xs font-semibold">GET</span></td><td class="px-4 py-3 font-mono text-purple-600">/models/status</td><td class="px-4 py-3 text-gray-600">Loaded models info</td></tr> | |
| <tr><td class="px-4 py-3"><span class="bg-blue-100 text-blue-700 px-2 py-1 rounded text-xs font-semibold">POST</span></td><td class="px-4 py-3 font-mono text-purple-600">/anthropic/v1/messages</td><td class="px-4 py-3 text-gray-600">Anthropic Messages API</td></tr> | |
| <tr><td class="px-4 py-3"><span class="bg-blue-100 text-blue-700 px-2 py-1 rounded text-xs font-semibold">POST</span></td><td class="px-4 py-3 font-mono text-purple-600">/v1/chat/completions</td><td class="px-4 py-3 text-gray-600">OpenAI Chat API</td></tr> | |
| <tr><td class="px-4 py-3"><span class="bg-green-100 text-green-700 px-2 py-1 rounded text-xs font-semibold">GET</span></td><td class="px-4 py-3 font-mono text-purple-600">/anthropic/v1/models</td><td class="px-4 py-3 text-gray-600">List available models</td></tr> | |
| </tbody> | |
| </table> | |
| </div> | |
| </div> | |
| <!-- Footer --> | |
| <div class="text-center mt-8 text-purple-100"> | |
| <p>Built with llama.cpp + FastAPI | Model: Qwen2.5-Coder-7B-Instruct (Q4_K_M)</p> | |
| <p class="mt-2 text-sm opacity-75">Open source and self-hostable</p> | |
| </div> | |
| </div> | |
| <script> | |
| hljs.highlightAll(); | |
| const BASE_URL = window.location.origin; | |
| async function fetchStatus() { | |
| try { | |
| const res = await fetch(BASE_URL + '/api/status'); | |
| const data = await res.json(); | |
| document.getElementById('status-indicator').className = 'status-dot w-3 h-3 bg-green-400 rounded-full'; | |
| document.getElementById('api-status').innerHTML = ` | |
| <div class="font-semibold text-green-600">Online</div> | |
| <div class="text-xs text-gray-500 mt-1">Version: ${data.version || 'N/A'}</div> | |
| `; | |
| if (data.queue) { | |
| document.getElementById('queue-status').innerHTML = ` | |
| <div>Active: <span class="font-semibold">${data.queue.active_requests || 0}</span></div> | |
| <div>Queue: <span class="font-semibold">${data.queue.queue_length || 0}</span></div> | |
| <div class="text-xs text-gray-500 mt-1">Total: ${data.queue.stats?.total_requests || 0}</div> | |
| `; | |
| } | |
| if (data.cache) { | |
| document.getElementById('cache-status').innerHTML = ` | |
| <div>Hit Rate: <span class="font-semibold">${data.cache.hit_rate || '0%'}</span></div> | |
| <div>Size: <span class="font-semibold">${data.cache.size || 0}/${data.cache.max_size || 10}</span></div> | |
| `; | |
| } | |
| if (data.models) { | |
| const modelsHtml = data.models.map(m => ` | |
| <div class="flex items-center justify-between p-3 bg-gray-50 rounded-lg"> | |
| <div> | |
| <span class="font-semibold">${m.id}</span> | |
| <span class="text-sm text-gray-500 ml-2">${m.size} (${m.quantization})</span> | |
| </div> | |
| <div class="flex items-center gap-2"> | |
| ${m.loaded ? '<span class="bg-green-100 text-green-700 px-2 py-1 rounded text-xs">Loaded</span>' : '<span class="bg-gray-100 text-gray-600 px-2 py-1 rounded text-xs">Available</span>'} | |
| ${m.default ? '<span class="bg-purple-100 text-purple-700 px-2 py-1 rounded text-xs">Default</span>' : ''} | |
| </div> | |
| </div> | |
| `).join(''); | |
| document.getElementById('models-list').innerHTML = modelsHtml; | |
| } | |
| } catch (e) { | |
| document.getElementById('status-indicator').className = 'status-dot w-3 h-3 bg-red-400 rounded-full'; | |
| document.getElementById('api-status').innerHTML = '<span class="text-red-600">Offline or Building</span>'; | |
| } | |
| } | |
| async function sendMessage() { | |
| const prompt = document.getElementById('prompt-input').value; | |
| const model = document.getElementById('model-select').value; | |
| const output = document.getElementById('response-output'); | |
| const timing = document.getElementById('timing-info'); | |
| output.classList.remove('hidden'); | |
| output.textContent = 'Sending...'; | |
| timing.classList.add('hidden'); | |
| const startTime = Date.now(); | |
| try { | |
| const res = await fetch(BASE_URL + '/anthropic/v1/messages', { | |
| method: 'POST', | |
| headers: { | |
| 'Content-Type': 'application/json', | |
| 'x-api-key': 'test-key', | |
| 'anthropic-version': '2023-06-01' | |
| }, | |
| body: JSON.stringify({ | |
| model: model, | |
| max_tokens: 1024, | |
| messages: [{ role: 'user', content: prompt }] | |
| }) | |
| }); | |
| const data = await res.json(); | |
| const elapsed = ((Date.now() - startTime) / 1000).toFixed(2); | |
| output.textContent = data.content?.[0]?.text || JSON.stringify(data, null, 2); | |
| timing.textContent = `Response time: ${elapsed}s | Model: ${model} | Tokens: ${data.usage?.output_tokens || 'N/A'}`; | |
| timing.classList.remove('hidden'); | |
| } catch (e) { | |
| output.textContent = 'Error: ' + e.message; | |
| } | |
| } | |
| async function sendStreamingMessage() { | |
| const prompt = document.getElementById('prompt-input').value; | |
| const model = document.getElementById('model-select').value; | |
| const output = document.getElementById('response-output'); | |
| const timing = document.getElementById('timing-info'); | |
| output.classList.remove('hidden'); | |
| output.textContent = ''; | |
| timing.classList.add('hidden'); | |
| const startTime = Date.now(); | |
| let tokenCount = 0; | |
| try { | |
| const res = await fetch(BASE_URL + '/anthropic/v1/messages', { | |
| method: 'POST', | |
| headers: { | |
| 'Content-Type': 'application/json', | |
| 'x-api-key': 'test-key', | |
| 'anthropic-version': '2023-06-01' | |
| }, | |
| body: JSON.stringify({ | |
| model: model, | |
| max_tokens: 1024, | |
| stream: true, | |
| messages: [{ role: 'user', content: prompt }] | |
| }) | |
| }); | |
| const reader = res.body.getReader(); | |
| const decoder = new TextDecoder(); | |
| while (true) { | |
| const { done, value } = await reader.read(); | |
| if (done) break; | |
| const chunk = decoder.decode(value); | |
| const lines = chunk.split('\n'); | |
| for (const line of lines) { | |
| if (line.startsWith('data: ')) { | |
| try { | |
| const data = JSON.parse(line.slice(6)); | |
| if (data.delta?.text) { | |
| output.textContent += data.delta.text; | |
| tokenCount++; | |
| } | |
| } catch {} | |
| } | |
| } | |
| } | |
| const elapsed = ((Date.now() - startTime) / 1000).toFixed(2); | |
| const tokensPerSec = (tokenCount / elapsed).toFixed(1); | |
| timing.textContent = `Time: ${elapsed}s | Model: ${model} | ~${tokenCount} tokens | ${tokensPerSec} tok/s`; | |
| timing.classList.remove('hidden'); | |
| } catch (e) { | |
| output.textContent = 'Error: ' + e.message; | |
| } | |
| } | |
| async function fetchLogs() { | |
| try { | |
| const res = await fetch(BASE_URL + '/logs?lines=50'); | |
| const data = await res.json(); | |
| document.getElementById('logs-output').textContent = data.logs || 'No logs available'; | |
| } catch (e) { | |
| document.getElementById('logs-output').textContent = 'Error loading logs: ' + e.message; | |
| } | |
| } | |
| // Initial fetch | |
| fetchStatus(); | |
| setInterval(fetchStatus, 30000); | |
| </script> | |
| </body> | |
| </html> | |