File size: 970 Bytes
90937b1 6e9c061 90937b1 6e9c061 1728e7f 6e9c061 3db39aa 1728e7f 90937b1 6e9c061 90937b1 38951bc 90937b1 6e9c061 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 | """
Hugging Face Space server for Nanbeige/Nanbeige4.1-3B.
This file uses the shared runtime with:
- async queue buffering
- worker pool + semaphore concurrency
- safe per-request generation thread lifecycle
"""
try:
from .server_runtime import RuntimeConfig, create_hf_space_app
except ImportError: # pragma: no cover - direct script execution
from server_runtime import RuntimeConfig, create_hf_space_app
MODEL_NAME = "Nanbeige/Nanbeige4.1-3B"
app = create_hf_space_app(
RuntimeConfig(
model_name=MODEL_NAME,
title="Nanbeige4.1-3B Inference API",
description="Streaming chat completion API for Nanbeige4.1-3B",
max_input_tokens=32768,
eos_token_id=166101,
default_temperature=0.6,
top_p=0.95,
repetition_penalty=1.0,
tokenizer_use_fast=False,
logger_name=__name__,
)
)
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)
|