|
|
"""Example FastAPI server for llama.cpp. |
|
|
|
|
|
To run this example: |
|
|
|
|
|
```bash |
|
|
pip install fastapi uvicorn sse-starlette pydantic-settings |
|
|
export MODEL=../models/7B/... |
|
|
``` |
|
|
|
|
|
Then run: |
|
|
``` |
|
|
uvicorn llama_cpp.server.app:create_app --reload |
|
|
``` |
|
|
|
|
|
or |
|
|
|
|
|
``` |
|
|
python3 -m llama_cpp.server |
|
|
``` |
|
|
|
|
|
Then visit http://localhost:8000/docs to see the interactive API docs. |
|
|
|
|
|
""" |
|
|
|
|
|
from __future__ import annotations |
|
|
|
|
|
import os |
|
|
import sys |
|
|
import argparse |
|
|
|
|
|
import uvicorn |
|
|
|
|
|
from llama_cpp.server.app import create_app |
|
|
from llama_cpp.server.settings import ( |
|
|
Settings, |
|
|
ServerSettings, |
|
|
ModelSettings, |
|
|
ConfigFileSettings, |
|
|
) |
|
|
from llama_cpp.server.cli import add_args_from_model, parse_model_from_args |
|
|
|
|
|
|
|
|
def main(): |
|
|
description = "🦙 Llama.cpp python server. Host your own LLMs!🚀" |
|
|
parser = argparse.ArgumentParser(description=description) |
|
|
|
|
|
add_args_from_model(parser, Settings) |
|
|
parser.add_argument( |
|
|
"--config_file", |
|
|
type=str, |
|
|
help="Path to a config file to load.", |
|
|
) |
|
|
server_settings: ServerSettings | None = None |
|
|
model_settings: list[ModelSettings] = [] |
|
|
args = parser.parse_args() |
|
|
try: |
|
|
|
|
|
config_file = os.environ.get("CONFIG_FILE", args.config_file) |
|
|
if config_file: |
|
|
if not os.path.exists(config_file): |
|
|
raise ValueError(f"Config file {config_file} not found!") |
|
|
with open(config_file, "rb") as f: |
|
|
|
|
|
if config_file.endswith(".yaml") or config_file.endswith(".yml"): |
|
|
import yaml |
|
|
import json |
|
|
|
|
|
config_file_settings = ConfigFileSettings.model_validate_json( |
|
|
json.dumps(yaml.safe_load(f)) |
|
|
) |
|
|
else: |
|
|
config_file_settings = ConfigFileSettings.model_validate_json( |
|
|
f.read() |
|
|
) |
|
|
server_settings = ServerSettings.model_validate(config_file_settings) |
|
|
model_settings = config_file_settings.models |
|
|
else: |
|
|
server_settings = parse_model_from_args(ServerSettings, args) |
|
|
model_settings = [parse_model_from_args(ModelSettings, args)] |
|
|
except Exception as e: |
|
|
print(e, file=sys.stderr) |
|
|
parser.print_help() |
|
|
sys.exit(1) |
|
|
assert server_settings is not None |
|
|
assert model_settings is not None |
|
|
app = create_app( |
|
|
server_settings=server_settings, |
|
|
model_settings=model_settings, |
|
|
) |
|
|
uvicorn.run( |
|
|
app, |
|
|
host=os.getenv("HOST", server_settings.host), |
|
|
port=int(os.getenv("PORT", server_settings.port)), |
|
|
ssl_keyfile=server_settings.ssl_keyfile, |
|
|
ssl_certfile=server_settings.ssl_certfile, |
|
|
) |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |
|
|
|