|
|
"""
|
|
|
Deployment module for KerdosAI.
|
|
|
"""
|
|
|
|
|
|
from typing import Dict, Any, Optional
|
|
|
import torch
|
|
|
from fastapi import FastAPI, HTTPException
|
|
|
from pydantic import BaseModel
|
|
|
import uvicorn
|
|
|
import docker
|
|
|
import yaml
|
|
|
import logging
|
|
|
from pathlib import Path
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class TextRequest(BaseModel):
|
|
|
"""Request model for text generation."""
|
|
|
text: str
|
|
|
max_length: Optional[int] = 100
|
|
|
temperature: Optional[float] = 0.7
|
|
|
top_p: Optional[float] = 0.9
|
|
|
|
|
|
class Deployer:
|
|
|
"""
|
|
|
Handles model deployment in various environments.
|
|
|
"""
|
|
|
|
|
|
def __init__(
|
|
|
self,
|
|
|
model: Any,
|
|
|
tokenizer: Any,
|
|
|
device: Optional[str] = None
|
|
|
):
|
|
|
"""
|
|
|
Initialize the deployer.
|
|
|
|
|
|
Args:
|
|
|
model: The trained model
|
|
|
tokenizer: The model's tokenizer
|
|
|
device: Device to run inference on
|
|
|
"""
|
|
|
self.model = model
|
|
|
self.tokenizer = tokenizer
|
|
|
self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
|
|
|
self.model.to(self.device)
|
|
|
self.model.eval()
|
|
|
|
|
|
def deploy(
|
|
|
self,
|
|
|
deployment_type: str = "rest",
|
|
|
host: str = "0.0.0.0",
|
|
|
port: int = 8000,
|
|
|
**kwargs
|
|
|
) -> None:
|
|
|
"""
|
|
|
Deploy the model.
|
|
|
|
|
|
Args:
|
|
|
deployment_type: Type of deployment (rest/docker/kubernetes)
|
|
|
host: Host address for REST API
|
|
|
port: Port number for REST API
|
|
|
**kwargs: Additional deployment parameters
|
|
|
"""
|
|
|
if deployment_type == "rest":
|
|
|
self._deploy_rest(host, port)
|
|
|
elif deployment_type == "docker":
|
|
|
self._deploy_docker(**kwargs)
|
|
|
elif deployment_type == "kubernetes":
|
|
|
self._deploy_kubernetes(**kwargs)
|
|
|
else:
|
|
|
raise ValueError(f"Unsupported deployment type: {deployment_type}")
|
|
|
|
|
|
def _deploy_rest(self, host: str, port: int) -> None:
|
|
|
"""
|
|
|
Deploy the model as a REST API.
|
|
|
|
|
|
Args:
|
|
|
host: Host address
|
|
|
port: Port number
|
|
|
"""
|
|
|
app = FastAPI(title="KerdosAI API")
|
|
|
|
|
|
@app.post("/generate")
|
|
|
async def generate_text(request: TextRequest):
|
|
|
try:
|
|
|
|
|
|
inputs = self.tokenizer(
|
|
|
request.text,
|
|
|
return_tensors="pt",
|
|
|
padding=True,
|
|
|
truncation=True
|
|
|
).to(self.device)
|
|
|
|
|
|
|
|
|
with torch.no_grad():
|
|
|
outputs = self.model.generate(
|
|
|
**inputs,
|
|
|
max_length=request.max_length,
|
|
|
temperature=request.temperature,
|
|
|
top_p=request.top_p,
|
|
|
pad_token_id=self.tokenizer.eos_token_id
|
|
|
)
|
|
|
|
|
|
|
|
|
generated_text = self.tokenizer.decode(
|
|
|
outputs[0],
|
|
|
skip_special_tokens=True
|
|
|
)
|
|
|
|
|
|
return {"generated_text": generated_text}
|
|
|
|
|
|
except Exception as e:
|
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
|
|
|
uvicorn.run(app, host=host, port=port)
|
|
|
|
|
|
def _deploy_docker(self, **kwargs) -> None:
|
|
|
"""
|
|
|
Deploy the model using Docker.
|
|
|
|
|
|
Args:
|
|
|
**kwargs: Additional Docker deployment parameters
|
|
|
"""
|
|
|
|
|
|
dockerfile_content = """
|
|
|
FROM python:3.8-slim
|
|
|
|
|
|
WORKDIR /app
|
|
|
|
|
|
COPY requirements.txt .
|
|
|
RUN pip install -r requirements.txt
|
|
|
|
|
|
COPY . .
|
|
|
|
|
|
CMD ["python", "-m", "kerdosai.deployer", "--deploy", "rest"]
|
|
|
"""
|
|
|
|
|
|
|
|
|
with open("Dockerfile", "w") as f:
|
|
|
f.write(dockerfile_content)
|
|
|
|
|
|
|
|
|
client = docker.from_env()
|
|
|
|
|
|
try:
|
|
|
|
|
|
image, _ = client.images.build(
|
|
|
path=".",
|
|
|
tag="kerdosai:latest",
|
|
|
dockerfile="Dockerfile"
|
|
|
)
|
|
|
|
|
|
|
|
|
container = client.containers.run(
|
|
|
image.id,
|
|
|
ports={'8000/tcp': 8000},
|
|
|
detach=True
|
|
|
)
|
|
|
|
|
|
logger.info(f"Docker container started: {container.id}")
|
|
|
|
|
|
except Exception as e:
|
|
|
logger.error(f"Error deploying with Docker: {str(e)}")
|
|
|
raise
|
|
|
|
|
|
def _deploy_kubernetes(self, **kwargs) -> None:
|
|
|
"""
|
|
|
Deploy the model using Kubernetes.
|
|
|
|
|
|
Args:
|
|
|
**kwargs: Additional Kubernetes deployment parameters
|
|
|
"""
|
|
|
|
|
|
deployment_manifest = {
|
|
|
"apiVersion": "apps/v1",
|
|
|
"kind": "Deployment",
|
|
|
"metadata": {
|
|
|
"name": "kerdosai"
|
|
|
},
|
|
|
"spec": {
|
|
|
"replicas": 1,
|
|
|
"selector": {
|
|
|
"matchLabels": {
|
|
|
"app": "kerdosai"
|
|
|
}
|
|
|
},
|
|
|
"template": {
|
|
|
"metadata": {
|
|
|
"labels": {
|
|
|
"app": "kerdosai"
|
|
|
}
|
|
|
},
|
|
|
"spec": {
|
|
|
"containers": [{
|
|
|
"name": "kerdosai",
|
|
|
"image": "kerdosai:latest",
|
|
|
"ports": [{
|
|
|
"containerPort": 8000
|
|
|
}]
|
|
|
}]
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
|
|
|
service_manifest = {
|
|
|
"apiVersion": "v1",
|
|
|
"kind": "Service",
|
|
|
"metadata": {
|
|
|
"name": "kerdosai"
|
|
|
},
|
|
|
"spec": {
|
|
|
"selector": {
|
|
|
"app": "kerdosai"
|
|
|
},
|
|
|
"ports": [{
|
|
|
"port": 80,
|
|
|
"targetPort": 8000
|
|
|
}],
|
|
|
"type": "LoadBalancer"
|
|
|
}
|
|
|
}
|
|
|
|
|
|
|
|
|
with open("deployment.yaml", "w") as f:
|
|
|
yaml.dump(deployment_manifest, f)
|
|
|
|
|
|
with open("service.yaml", "w") as f:
|
|
|
yaml.dump(service_manifest, f)
|
|
|
|
|
|
logger.info("Kubernetes manifests created. Apply them using:")
|
|
|
logger.info("kubectl apply -f deployment.yaml")
|
|
|
logger.info("kubectl apply -f service.yaml") |