NanoBotAIAgent commited on
Commit
8a42673
·
verified ·
1 Parent(s): 803b895

Entrypoint with reasoning on for Gemma-4-E2B

Browse files
Files changed (1) hide show
  1. entrypoint.sh +52 -0
entrypoint.sh ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ set -e
3
+
4
+ MODEL_PATH="/data/model/Gemma-4-E2B-Uncensored-HauhauCS-Aggressive-Q8_K_P.gguf"
5
+
6
+ # Detect CPU cores for max threading
7
+ NPROC=$(nproc)
8
+ echo "Using $NPROC threads"
9
+
10
+ # Context window = 131072 tokens.
11
+ # --n-predict 25000 sets the default/maximum generated tokens per request.
12
+ #
13
+ # Reasoning ("thinking") is ENABLED BY DEFAULT:
14
+ # --jinja use the model's embedded Gemma-4 chat template
15
+ # --reasoning on sets enable_thinking=true in the template kwargs,
16
+ # which injects <|think|> and lets the model generate
17
+ # <|channel>thought...reasoning...<channel|> blocks
18
+ # --reasoning-format deepseek
19
+ # extracts the <|channel>thought...<channel|> block
20
+ # into a separate `reasoning_content` field (same as
21
+ # DeepSeek/Qwen3 API format)
22
+ # --reasoning-budget -1 unrestricted thinking length
23
+ #
24
+ # NOTE: Gemma-4 uses <|channel>thought / <channel|> delimiters (NOT <think>).
25
+ # llama.cpp auto-detects the Gemma-4 template and uses the correct PEG parser.
26
+ /app/llama-server \
27
+ --model "$MODEL_PATH" \
28
+ --port 8080 \
29
+ --host 127.0.0.1 \
30
+ --ctx-size 131072 \
31
+ --n-predict 25000 \
32
+ --parallel 1 \
33
+ --threads "$NPROC" \
34
+ --threads-batch "$NPROC" \
35
+ --batch-size 512 \
36
+ --jinja \
37
+ --reasoning on \
38
+ --reasoning-format deepseek \
39
+ --reasoning-budget -1 &
40
+
41
+ LLAMA_PID=$!
42
+
43
+ echo "Waiting for llama-server to start..."
44
+ for i in {1..600}; do
45
+ if curl -s http://127.0.0.1:8080/health > /dev/null 2>&1; then
46
+ echo "llama-server is ready!"
47
+ break
48
+ fi
49
+ sleep 1
50
+ done
51
+
52
+ exec uvicorn proxy:app --host 0.0.0.0 --port 8000 --proxy-headers