File size: 970 Bytes
90937b1
6e9c061
90937b1
6e9c061
 
 
 
 
1728e7f
6e9c061
 
 
 
3db39aa
1728e7f
 
90937b1
6e9c061
 
 
 
 
 
 
 
 
 
 
 
90937b1
38951bc
 
90937b1
 
 
6e9c061
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
"""
Hugging Face Space server for Nanbeige/Nanbeige4.1-3B.

This file uses the shared runtime with:
- async queue buffering
- worker pool + semaphore concurrency
- safe per-request generation thread lifecycle
"""

try:
    from .server_runtime import RuntimeConfig, create_hf_space_app
except ImportError:  # pragma: no cover - direct script execution
    from server_runtime import RuntimeConfig, create_hf_space_app


MODEL_NAME = "Nanbeige/Nanbeige4.1-3B"

app = create_hf_space_app(
    RuntimeConfig(
        model_name=MODEL_NAME,
        title="Nanbeige4.1-3B Inference API",
        description="Streaming chat completion API for Nanbeige4.1-3B",
        max_input_tokens=32768,
        eos_token_id=166101,
        default_temperature=0.6,
        top_p=0.95,
        repetition_penalty=1.0,
        tokenizer_use_fast=False,
        logger_name=__name__,
    )
)


if __name__ == "__main__":
    import uvicorn

    uvicorn.run(app, host="0.0.0.0", port=7860)