Spaces:

owlninjam
/

spacecx

Paused

App Files Files Community

owlninjam commited on Aug 7, 2025

Commit

cc22945

verified ·

1 Parent(s): 252854a

Create api.py

Browse files

Files changed (1) hide show

api.py +269 -0

api.py ADDED Viewed

	@@ -0,0 +1,269 @@

+from fastapi import FastAPI, HTTPException, Depends, status
+from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+from llama_cpp import Llama
+import os
+import uvicorn
+from typing import Optional, List, Dict, Union, Literal
+import time
+import json
+import uuid
+from datetime import datetime
+# Configuration
+VALID_API_KEYS = {
+    "sk-adminkey02",
+    "sk-testkey123",
+    "sk-userkey456",
+    "sk-demokey789"
+}
+# Global model variable
+llm = None
+security = HTTPBearer()
+# OpenAI-compatible request/response models
+class Message(BaseModel):
+    role: Literal["system", "user", "assistant"]
+    content: str
+class ChatCompletionRequest(BaseModel):
+    model: str = "capybarahermes-2.5-mistral-7b"
+    messages: List[Message]
+    max_tokens: Optional[int] = 512
+    temperature: Optional[float] = 0.7
+    top_p: Optional[float] = 0.9
+    n: Optional[int] = 1
+    stream: Optional[bool] = False
+    stop: Optional[Union[str, List[str]]] = None
+class ChatCompletionChoice(BaseModel):
+    index: int
+    message: Message
+    finish_reason: Literal["stop", "length", "content_filter"]
+class Usage(BaseModel):
+    prompt_tokens: int
+    completion_tokens: int
+    total_tokens: int
+class ChatCompletionResponse(BaseModel):
+    id: str
+    object: str = "chat.completion"
+    created: int
+    model: str
+    choices: List[ChatCompletionChoice]
+    usage: Usage
+class Model(BaseModel):
+    id: str
+    object: str = "model"
+    created: int
+    owned_by: str
+class ModelsResponse(BaseModel):
+    object: str = "list"
+    data: List[Model]
+# Initialize FastAPI
+app = FastAPI(
+    title="CapybaraHermes OpenAI API",
+    description="OpenAI-compatible API for CapybaraHermes-2.5-Mistral-7B",
+    version="1.0.0",
+    docs_url="/v1/docs",
+    redoc_url="/v1/redoc"
+)
+# Add CORS middleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+def verify_api_key(credentials: HTTPAuthorizationCredentials = Depends(security)):
+    """Verify API key"""
+    if credentials.credentials not in VALID_API_KEYS:
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail="Invalid API key"
+        )
+    return credentials.credentials
+def load_model():
+    """Load the GGUF model"""
+    global llm
+    model_path = "capybarahermes-2.5-mistral-7b.Q5_K_M.gguf"
+    if not os.path.exists(model_path):
+        raise Exception(f"Model file {model_path} not found!")
+    try:
+        llm = Llama(
+            model_path=model_path,
+            n_ctx=4096,
+            n_threads=2,
+            n_batch=512,
+            verbose=False,
+            use_mlock=True,
+            n_gpu_layers=0,
+        )
+        print("✅ Model loaded successfully!")
+    except Exception as e:
+        raise Exception(f"❌ Error loading model: {str(e)}")
+def format_messages(messages: List[Message]) -> str:
+    """Format messages for ChatML format"""
+    formatted = ""
+    for message in messages:
+        formatted += f"<|im_start|>{message.role}\n{message.content}\n<|im_end|>\n"
+    formatted += "<|im_start|>assistant\n"
+    return formatted
+def count_tokens_rough(text: str) -> int:
+    """Rough token counting"""
+    return len(text.split())
+@app.on_event("startup")
+async def startup_event():
+    """Load model on startup"""
+    try:
+        print("🚀 Starting CapybaraHermes API server...")
+        load_model()
+    except Exception as e:
+        print(f"💥 Failed to load model: {e}")
+        raise e
+# API endpoints with authentication
+@app.get("/v1/models", response_model=ModelsResponse)
+async def list_models(api_key: str = Depends(verify_api_key)):
+    """List available models"""
+    return ModelsResponse(
+        data=[
+            Model(
+                id="capybarahermes-2.5-mistral-7b",
+                created=int(datetime.now().timestamp()),
+                owned_by="local"
+            )
+        ]
+    )
+@app.post("/v1/chat/completions", response_model=ChatCompletionResponse)
+async def create_chat_completion(
+    request: ChatCompletionRequest,
+    api_key: str = Depends(verify_api_key)
+):
+    """Create chat completion"""
+    if llm is None:
+        raise HTTPException(status_code=503, detail="Model not loaded")
+    try:
+        # Format messages
+        prompt = format_messages(request.messages)
+        prompt_tokens = count_tokens_rough(prompt)
+        start_time = time.time()
+        # Generate response
+        response = llm(
+            prompt,
+            max_tokens=request.max_tokens,
+            temperature=request.temperature,
+            top_p=request.top_p,
+            stop=["<|im_end|>", "<|im_start|>"] + (request.stop or []),
+            echo=False
+        )
+        end_time = time.time()
+        generation_time = end_time - start_time
+        # Extract response
+        response_text = response['choices'][0]['text'].strip()
+        completion_tokens = count_tokens_rough(response_text)
+        tokens_per_second = completion_tokens / generation_time if generation_time > 0 else 0
+        print(f"⚡ Generated {completion_tokens} tokens in {generation_time:.2f}s ({tokens_per_second:.2f} tok/s)")
+        return ChatCompletionResponse(
+            id=f"chatcmpl-{uuid.uuid4().hex[:8]}",
+            created=int(time.time()),
+            model=request.model,
+            choices=[
+                ChatCompletionChoice(
+                    index=0,
+                    message=Message(role="assistant", content=response_text),
+                    finish_reason="stop"
+                )
+            ],
+            usage=Usage(
+                prompt_tokens=prompt_tokens,
+                completion_tokens=completion_tokens,
+                total_tokens=prompt_tokens + completion_tokens
+            )
+        )
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error generating response: {str(e)}")
+@app.get("/v1/health")
+async def health_check():
+    """Health check (no auth required)"""
+    if llm is None:
+        raise HTTPException(status_code=503, detail="Model not loaded")
+    return {
+        "status": "healthy",
+        "model_loaded": True,
+        "timestamp": datetime.now().isoformat(),
+        "model": "capybarahermes-2.5-mistral-7b"
+    }
+@app.get("/v1")
+async def api_info():
+    """API information"""
+    return {
+        "message": "🦙 CapybaraHermes OpenAI Compatible API",
+        "model": "CapybaraHermes-2.5-Mistral-7B (Q5_K_M quantized)",
+        "endpoints": {
+            "chat_completions": "/v1/chat/completions",
+            "models": "/v1/models",
+            "health": "/v1/health",
+            "docs": "/v1/docs"
+        },
+        "authentication": {
+            "required": True,
+            "type": "Bearer token",
+            "header": "Authorization: Bearer sk-your-api-key",
+            "valid_keys": ["sk-adminkey02", "sk-testkey123", "sk-userkey456", "sk-demokey789"]
+        },
+        "usage": {
+            "sdk": "pip install openai",
+            "base_url": "https://your-username-your-space.hf.space/v1",
+            "example": "client = OpenAI(base_url='https://your-space.hf.space/v1', api_key='sk-adminkey02')"
+        },
+        "performance": {
+            "expected_speed": "2-8 tokens/second (CPU)",
+            "context_length": 4096,
+            "quantization": "Q5_K_M"
+        }
+    }
+# Public endpoint for basic info (no auth)
+@app.get("/api")
+async def public_api_info():
+    """Public API information"""
+    return {
+        "service": "CapybaraHermes API",
+        "status": "running",
+        "endpoints": "/v1/",
+        "docs": "/v1/docs",
+        "chat_ui": "/",
+        "authentication": "API key required for /v1/* endpoints"
+    }
+if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=8000)