| from fastapi import FastAPI, HTTPException |
| from pydantic import BaseModel |
| from typing import List, Optional |
| import uvicorn |
| import time |
| from .model_manager import OVModelManager |
| from .gguf_manager import OVGGUFManager |
|
|
| app = FastAPI(title="OpenVinayaka API", version="1.0") |
|
|
| |
| model_instance = None |
|
|
| class ChatMessage(BaseModel): |
| role: str |
| content: str |
|
|
| class ChatCompletionRequest(BaseModel): |
| model: str |
| messages: List[ChatMessage] |
| temperature: Optional[float] = 0.7 |
| max_tokens: Optional[int] = 100 |
|
|
| class ChatCompletionResponse(BaseModel): |
| id: str |
| object: str = "chat.completion" |
| created: int |
| model: str |
| choices: List[dict] |
| usage: dict |
|
|
| @app.on_event("startup") |
| async def startup_event(): |
| print("🚀 OpenVinayaka API Server Started") |
|
|
| @app.post("/v1/chat/completions", response_model=ChatCompletionResponse) |
| async def chat_completions(request: ChatCompletionRequest): |
| global model_instance |
| |
| |
| if model_instance is None: |
| |
| if request.model.endswith(".gguf"): |
| print(f"Loading GGUF Model: {request.model}") |
| model_instance = OVGGUFManager(request.model) |
| else: |
| print(f"Loading HF Model: {request.model}") |
| model_instance = OVModelManager(request.model) |
| model_instance.attach_ov_hooks() |
| |
| |
| prompt = "" |
| for msg in request.messages: |
| prompt += f"{msg.role}: {msg.content}\n" |
| prompt += "assistant:" |
| |
| |
| response_text = model_instance.generate(prompt, max_new_tokens=request.max_tokens) |
| |
| |
| usage = {"prompt_tokens": len(prompt), "completion_tokens": len(response_text), "total_tokens": len(prompt)+len(response_text)} |
| |
| return ChatCompletionResponse( |
| id=f"chatcmpl-{int(time.time())}", |
| created=int(time.time()), |
| model=request.model, |
| choices=[{ |
| "index": 0, |
| "message": {"role": "assistant", "content": response_text}, |
| "finish_reason": "stop" |
| }], |
| usage=usage |
| ) |
|
|
| def start_server(host="0.0.0.0", port=8000, model=None): |
| |
| global model_instance |
| if model: |
| if model.endswith(".gguf"): |
| model_instance = OVGGUFManager(model) |
| else: |
| model_instance = OVModelManager(model) |
| model_instance.attach_ov_hooks() |
| |
| uvicorn.run(app, host=host, port=port) |
|
|