| from sanic import Sanic, text |
| from sanic.response import json |
|
|
| import sglang as sgl |
|
|
| engine = None |
|
|
| |
| app = Sanic("sanic-server") |
|
|
|
|
| |
| @app.route("/generate", methods=["POST"]) |
| async def generate(request): |
| prompt = request.json.get("prompt") |
| if not prompt: |
| return json({"error": "Prompt is required"}, status=400) |
|
|
| |
| result = await engine.async_generate(prompt) |
|
|
| return text(result["text"]) |
|
|
|
|
| @app.route("/generate_stream", methods=["POST"]) |
| async def generate_stream(request): |
| prompt = request.json.get("prompt") |
|
|
| if not prompt: |
| return json({"error": "Prompt is required"}, status=400) |
|
|
| |
| result = await engine.async_generate(prompt, stream=True) |
|
|
| |
| |
| response = await request.respond() |
|
|
| |
| async for chunk in result: |
| await response.send(chunk["text"]) |
|
|
| await response.eof() |
|
|
|
|
| def run_server(): |
| global engine |
| engine = sgl.Engine(model_path="meta-llama/Meta-Llama-3.1-8B-Instruct") |
| app.run(host="0.0.0.0", port=8000, single_process=True) |
|
|
|
|
| if __name__ == "__main__": |
| run_server() |
|
|