baseten-admin commited on
Commit
4dfd91a
·
verified ·
1 Parent(s): 8b31893

manifest Qwen/Qwen3-0.6B @ B200 (47aa25be9c0bf00c)

Browse files
Qwen__Qwen3-0.6B/B200/tp1-seq16384-lora64x16/47aa25be9c0bf00c/manifest.json ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": "Qwen/Qwen3-0.6B",
3
+ "gpu_type": "B200",
4
+ "tensor_parallel_size": 1,
5
+ "max_seq_length": 16384,
6
+ "enable_lora": true,
7
+ "max_lora_rank": 64,
8
+ "max_loras": 16,
9
+ "cudagraph_capture_sizes": [
10
+ 1,
11
+ 2,
12
+ 4,
13
+ 8,
14
+ 16,
15
+ 32,
16
+ 64,
17
+ 128,
18
+ 192,
19
+ 256,
20
+ 384,
21
+ 512,
22
+ 640,
23
+ 768,
24
+ 896,
25
+ 1000
26
+ ],
27
+ "use_mega_aot_artifact": true,
28
+ "deep_gemm_warmup": "skip",
29
+ "enable_prefix_caching": true,
30
+ "vllm_version": "0.22.0",
31
+ "torch_version": "2.11.0+cu129",
32
+ "torch": "2.11.0+cu129",
33
+ "torch_cuda": "12.9",
34
+ "vllm": "0.22.0",
35
+ "image_tag": "baseten/baseten-weight-sync-inference:main-15e6be27",
36
+ "caller": "github-actions:William-Gao1",
37
+ "model_revision": "c1899de289a04d12100db370d81485cdf75e47ca",
38
+ "build_id": "27571412154-1",
39
+ "opaque_sampler_payload": {
40
+ "tensor_parallel_size": 1,
41
+ "max_seq_length": 16384,
42
+ "enable_lora": true,
43
+ "max_lora_rank": 64,
44
+ "max_loras": 16,
45
+ "cudagraph_capture_sizes": [
46
+ 1,
47
+ 2,
48
+ 4,
49
+ 8,
50
+ 16,
51
+ 32,
52
+ 64,
53
+ 128,
54
+ 192,
55
+ 256,
56
+ 384,
57
+ 512,
58
+ 640,
59
+ 768,
60
+ 896,
61
+ 1000
62
+ ],
63
+ "use_mega_aot_artifact": true,
64
+ "deep_gemm_warmup": "skip",
65
+ "enable_prefix_caching": true,
66
+ "load_format": "fastsafetensors"
67
+ },
68
+ "build_profile": {
69
+ "cudagraph_capture_sizes": [
70
+ 1,
71
+ 2,
72
+ 4,
73
+ 8,
74
+ 16,
75
+ 32,
76
+ 64,
77
+ 128,
78
+ 192,
79
+ 256,
80
+ 384,
81
+ 512,
82
+ 640,
83
+ 768,
84
+ 896,
85
+ 1000
86
+ ],
87
+ "use_mega_aot_artifact": true,
88
+ "deep_gemm_warmup": "skip"
89
+ },
90
+ "ready_in_seconds_no_cache": 94.03,
91
+ "kv_cache_max_tokens": 1495856,
92
+ "kv_cache_max_concurrency": 91.2998046875,
93
+ "kv_cache_gpu_memory_utilization": 0.92,
94
+ "cache_size_uncompressed_bytes": 68997120,
95
+ "cache_size_compressed_bytes": 3386170,
96
+ "compression": "zstd -9 -T0 (multithreaded)",
97
+ "compress_time_seconds": 0.21,
98
+ "upload_time_seconds": 4.04
99
+ }