vaibhavlakshmi's picture
Upload folder using huggingface_hub
3930c05 verified
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import List, Optional
import uvicorn
import time
from .model_manager import OVModelManager
from .gguf_manager import OVGGUFManager
app = FastAPI(title="OpenVinayaka API", version="1.0")
# Global Model Instance
model_instance = None
class ChatMessage(BaseModel):
role: str
content: str
class ChatCompletionRequest(BaseModel):
model: str
messages: List[ChatMessage]
temperature: Optional[float] = 0.7
max_tokens: Optional[int] = 100
class ChatCompletionResponse(BaseModel):
id: str
object: str = "chat.completion"
created: int
model: str
choices: List[dict]
usage: dict
@app.on_event("startup")
async def startup_event():
print("🚀 OpenVinayaka API Server Started")
@app.post("/v1/chat/completions", response_model=ChatCompletionResponse)
async def chat_completions(request: ChatCompletionRequest):
global model_instance
# Lazy Load Model if needed
if model_instance is None:
# Check if it's a GGUF file or HF model
if request.model.endswith(".gguf"):
print(f"Loading GGUF Model: {request.model}")
model_instance = OVGGUFManager(request.model)
else:
print(f"Loading HF Model: {request.model}")
model_instance = OVModelManager(request.model)
model_instance.attach_ov_hooks()
# Format Prompt
prompt = ""
for msg in request.messages:
prompt += f"{msg.role}: {msg.content}\n"
prompt += "assistant:"
# Generate
response_text = model_instance.generate(prompt, max_new_tokens=request.max_tokens)
# Mock Usage
usage = {"prompt_tokens": len(prompt), "completion_tokens": len(response_text), "total_tokens": len(prompt)+len(response_text)}
return ChatCompletionResponse(
id=f"chatcmpl-{int(time.time())}",
created=int(time.time()),
model=request.model,
choices=[{
"index": 0,
"message": {"role": "assistant", "content": response_text},
"finish_reason": "stop"
}],
usage=usage
)
def start_server(host="0.0.0.0", port=8000, model=None):
# Pre-load if specified
global model_instance
if model:
if model.endswith(".gguf"):
model_instance = OVGGUFManager(model)
else:
model_instance = OVModelManager(model)
model_instance.attach_ov_hooks()
uvicorn.run(app, host=host, port=port)