react / app.py
Ashok75's picture
Upload 5 files
6e9c061 verified
raw
history blame contribute delete
970 Bytes
"""
Hugging Face Space server for Nanbeige/Nanbeige4.1-3B.
This file uses the shared runtime with:
- async queue buffering
- worker pool + semaphore concurrency
- safe per-request generation thread lifecycle
"""
try:
from .server_runtime import RuntimeConfig, create_hf_space_app
except ImportError: # pragma: no cover - direct script execution
from server_runtime import RuntimeConfig, create_hf_space_app
MODEL_NAME = "Nanbeige/Nanbeige4.1-3B"
app = create_hf_space_app(
RuntimeConfig(
model_name=MODEL_NAME,
title="Nanbeige4.1-3B Inference API",
description="Streaming chat completion API for Nanbeige4.1-3B",
max_input_tokens=32768,
eos_token_id=166101,
default_temperature=0.6,
top_p=0.95,
repetition_penalty=1.0,
tokenizer_use_fast=False,
logger_name=__name__,
)
)
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)