Inference-API / main /config.yaml
AurelioAguirre's picture
changed to uvicorn setup for HF v16
2a73022
raw
history blame
580 Bytes
server:
host: "0.0.0.0"
port: 8002
timeout: 60
max_batch_size: 1
llm_server:
host: "teamgenki-llmserver.hf.space"
port: 7860 # Will be ignored for hf.space URLs
timeout: 60.0
api_prefix: "/api/v1" # This will be used for route prefixing
endpoints:
generate: "/generate"
generate_stream: "/generate/stream"
embedding: "/embedding"
system_status: "/system/status"
system_validate: "/system/validate"
model_initialize: "/model/initialize"
model_initialize_embedding: "/model/initialize/embedding"
model_download: "/model/download"