File size: 3,467 Bytes
b5f400b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 |
#!/usr/bin/env python3
"""
OmniCoreX Deployment Server CLI Script
Deploys OmniCoreX as a REST API server for real-time inference,
supporting asynchronous input processing and JSON responses.
Requirements:
- fastapi
- uvicorn
Usage:
python scripts/deploy_server.py --config config.yaml --checkpoint ./checkpoints/checkpoint.pt --host 0.0.0.0 --port 8000
"""
import argparse
import torch
import uvicorn
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import Optional
from model import OmniCoreXModel
from utils import load_config_file, setup_logging
app = FastAPI()
model = None
tokenizer = None
device = None
class InferenceRequest(BaseModel):
text: str
# Extend with multimodal fields as needed, e.g., images encoded as base64
class InferenceResponse(BaseModel):
response: str
@app.post("/infer", response_model=InferenceResponse)
async def infer(request: InferenceRequest):
global model, tokenizer, device
if model is None:
raise HTTPException(status_code=503, detail="Model not loaded")
try:
input_ids = tokenizer.encode(request.text) if tokenizer else [ord(c) for c in request.text]
input_tensor = torch.tensor([input_ids], device=device)
with torch.no_grad():
outputs = model(input_tensor)
# Convert outputs to string, simplistic decoding:
response_tokens = torch.argmax(outputs, dim=-1)[0].tolist()
response_text = tokenizer.decode(response_tokens) if tokenizer else "".join(chr(t % 256) for t in response_tokens)
return InferenceResponse(response=response_text)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
def parse_args():
parser = argparse.ArgumentParser(description="Deploy OmniCoreX Model Serving API")
parser.add_argument("--config", type=str, default="config.yaml", help="Path to config file")
parser.add_argument("--checkpoint", type=str, required=True, help="Path to model checkpoint")
parser.add_argument("--host", type=str, default="127.0.0.1", help="Server host")
parser.add_argument("--port", type=int, default=8000, help="Server port")
return parser.parse_args()
def main():
global model, tokenizer, device
args = parse_args()
logger = setup_logging()
config = load_config_file(args.config)
model_cfg = config["model"]
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = OmniCoreXModel(
stream_configs=model_cfg["streams"],
embed_dim=model_cfg.get("architecture", {}).get("embed_dim", 768),
num_layers=model_cfg.get("architecture", {}).get("num_layers", 24),
num_heads=model_cfg.get("architecture", {}).get("num_heads", 12),
dropout=model_cfg.get("architecture", {}).get("dropout", 0.1)
)
model.load_state_dict(torch.load(args.checkpoint, map_location=device)["model_state_dict"])
model.to(device)
model.eval()
# Load tokenizer if available - optionally from tokenizer.py
try:
from tokenizer import BPETokenizer
# Assume vocabulary and merges paths from config or default location
tokenizer = BPETokenizer()
# tokenizer.load(<path_to_vocab>, <path_to_merges>)
except ImportError:
tokenizer = None
logger.warning("Tokenizer module not found or failed to load.")
uvicorn.run(app, host=args.host, port=args.port)
if __name__ == "__main__":
main()
|