File size: 7,655 Bytes
0f07ba7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
// Package system provides system detection utilities, including GPU/vendor detection
// and capability classification used to select optimal backends at runtime.
package system

import (
	"os"
	"path/filepath"
	"runtime"
	"strings"

	"github.com/mudler/xlog"
)

const (
	// Public constants - used by tests and external packages
	Nvidia = "nvidia"
	AMD    = "amd"
	Intel  = "intel"

	// Private constants - only used within this package
	defaultCapability = "default"
	nvidiaL4T         = "nvidia-l4t"
	darwinX86         = "darwin-x86"
	metal             = "metal"
	vulkan            = "vulkan"

	nvidiaCuda13    = "nvidia-cuda-13"
	nvidiaCuda12    = "nvidia-cuda-12"
	nvidiaL4TCuda12 = "nvidia-l4t-cuda-12"
	nvidiaL4TCuda13 = "nvidia-l4t-cuda-13"

	capabilityEnv        = "LOCALAI_FORCE_META_BACKEND_CAPABILITY"
	capabilityRunFileEnv = "LOCALAI_FORCE_META_BACKEND_CAPABILITY_RUN_FILE"
	defaultRunFile       = "/run/localai/capability"

	// Backend detection tokens (private)
	backendTokenDarwin = "darwin"
	backendTokenMLX    = "mlx"
	backendTokenMetal  = "metal"
	backendTokenL4T    = "l4t"
	backendTokenCUDA   = "cuda"
	backendTokenROCM   = "rocm"
	backendTokenHIP    = "hip"
	backendTokenSYCL   = "sycl"
)

var (
	cuda13DirExists bool
	cuda12DirExists bool
)

func init() {
	_, err := os.Stat(filepath.Join("usr", "local", "cuda-13"))
	cuda13DirExists = err == nil
	_, err = os.Stat(filepath.Join("usr", "local", "cuda-12"))
	cuda12DirExists = err == nil
}

func (s *SystemState) Capability(capMap map[string]string) string {
	reportedCapability := s.getSystemCapabilities()

	// Check if the reported capability is in the map
	if _, exists := capMap[reportedCapability]; exists {
		xlog.Debug("Using reported capability", "reportedCapability", reportedCapability, "capMap", capMap)
		return reportedCapability
	}

	xlog.Debug("The requested capability was not found, using default capability", "reportedCapability", reportedCapability, "capMap", capMap)
	// Otherwise, return the default capability (catch-all)
	return defaultCapability
}

func (s *SystemState) getSystemCapabilities() string {
	capability := os.Getenv(capabilityEnv)
	if capability != "" {
		xlog.Info("Using forced capability from environment variable", "capability", capability, "env", capabilityEnv)
		return capability
	}

	capabilityRunFile := defaultRunFile
	capabilityRunFileEnv := os.Getenv(capabilityRunFileEnv)
	if capabilityRunFileEnv != "" {
		capabilityRunFile = capabilityRunFileEnv
	}

	// Check if /run/localai/capability exists and use it
	// This might be used by e.g. container images to specify which
	// backends to pull in automatically when installing meta backends.
	if _, err := os.Stat(capabilityRunFile); err == nil {
		capability, err := os.ReadFile(capabilityRunFile)
		if err == nil {
			xlog.Info("Using forced capability run file", "capabilityRunFile", capabilityRunFile, "capability", string(capability), "env", capabilityRunFileEnv)
			return strings.Trim(strings.TrimSpace(string(capability)), "\n")
		}
	}

	// If we are on mac and arm64, we will return metal
	if runtime.GOOS == "darwin" && runtime.GOARCH == "arm64" {
		xlog.Info("Using metal capability (arm64 on mac)", "env", capabilityEnv)
		return metal
	}

	// If we are on mac and x86, we will return darwin-x86
	if runtime.GOOS == "darwin" && runtime.GOARCH == "amd64" {
		xlog.Info("Using darwin-x86 capability (amd64 on mac)", "env", capabilityEnv)
		return darwinX86
	}

	// If arm64 on linux and a nvidia gpu is detected, we will return nvidia-l4t
	if runtime.GOOS == "linux" && runtime.GOARCH == "arm64" {
		if s.GPUVendor == Nvidia {
			xlog.Info("Using nvidia-l4t capability (arm64 on linux)", "env", capabilityEnv)
			if cuda13DirExists {
				return nvidiaL4TCuda13
			}
			if cuda12DirExists {
				return nvidiaL4TCuda12
			}
			return nvidiaL4T
		}
	}

	if cuda13DirExists {
		return nvidiaCuda13
	}

	if cuda12DirExists {
		return nvidiaCuda12
	}

	if s.GPUVendor == "" {
		xlog.Info("Default capability (no GPU detected)", "env", capabilityEnv)
		return defaultCapability
	}

	xlog.Info("Capability automatically detected", "capability", s.GPUVendor, "env", capabilityEnv)
	// If vram is less than 4GB, let's default to CPU but warn the user that they can override that via env
	if s.VRAM <= 4*1024*1024*1024 {
		xlog.Warn("VRAM is less than 4GB, defaulting to CPU", "env", capabilityEnv)
		return defaultCapability
	}

	return s.GPUVendor
}

// BackendPreferenceTokens returns a list of substrings that represent the preferred
// backend implementation order for the current system capability. Callers can use
// these tokens to select the most appropriate concrete backend among multiple
// candidates sharing the same alias (e.g., "llama-cpp").
func (s *SystemState) BackendPreferenceTokens() []string {
	capStr := strings.ToLower(s.getSystemCapabilities())
	switch {
	case strings.HasPrefix(capStr, Nvidia):
		return []string{backendTokenCUDA, vulkan, "cpu"}
	case strings.HasPrefix(capStr, AMD):
		return []string{backendTokenROCM, backendTokenHIP, vulkan, "cpu"}
	case strings.HasPrefix(capStr, Intel):
		return []string{backendTokenSYCL, Intel, "cpu"}
	case strings.HasPrefix(capStr, metal):
		return []string{backendTokenMetal, "cpu"}
	case strings.HasPrefix(capStr, darwinX86):
		return []string{"darwin-x86", "cpu"}
	case strings.HasPrefix(capStr, vulkan):
		return []string{vulkan, "cpu"}
	default:
		return []string{"cpu"}
	}
}

// DetectedCapability returns the detected system capability string.
// This can be used by the UI to display what capability was detected.
func (s *SystemState) DetectedCapability() string {
	return s.getSystemCapabilities()
}

// IsBackendCompatible checks if a backend (identified by name and URI) is compatible
// with the current system capability. This function uses getSystemCapabilities to ensure
// consistency with capability detection (including VRAM checks, environment overrides, etc.).
func (s *SystemState) IsBackendCompatible(name, uri string) bool {
	combined := strings.ToLower(name + " " + uri)
	capability := s.getSystemCapabilities()

	// Check for darwin/macOS-specific backends (mlx, metal, darwin)
	isDarwinBackend := strings.Contains(combined, backendTokenDarwin) ||
		strings.Contains(combined, backendTokenMLX) ||
		strings.Contains(combined, backendTokenMetal)
	if isDarwinBackend {
		// Darwin backends require the system to be running on darwin with metal or darwin-x86 capability
		return capability == metal || capability == darwinX86
	}

	// Check for NVIDIA L4T-specific backends (arm64 Linux with NVIDIA GPU)
	// This must be checked before the general NVIDIA check as L4T backends
	// may also contain "cuda" or "nvidia" in their names
	isL4TBackend := strings.Contains(combined, backendTokenL4T)
	if isL4TBackend {
		return strings.HasPrefix(capability, nvidiaL4T)
	}

	// Check for NVIDIA/CUDA-specific backends (non-L4T)
	isNvidiaBackend := strings.Contains(combined, backendTokenCUDA) ||
		strings.Contains(combined, Nvidia)
	if isNvidiaBackend {
		// NVIDIA backends are compatible with nvidia, nvidia-cuda-12, nvidia-cuda-13, and l4t capabilities
		return strings.HasPrefix(capability, Nvidia)
	}

	// Check for AMD/ROCm-specific backends
	isAMDBackend := strings.Contains(combined, backendTokenROCM) ||
		strings.Contains(combined, backendTokenHIP) ||
		strings.Contains(combined, AMD)
	if isAMDBackend {
		return capability == AMD
	}

	// Check for Intel/SYCL-specific backends
	isIntelBackend := strings.Contains(combined, backendTokenSYCL) ||
		strings.Contains(combined, Intel)
	if isIntelBackend {
		return capability == Intel
	}

	// CPU backends are always compatible
	return true
}