|
|
|
|
|
|
|
|
package system |
|
|
|
|
|
import ( |
|
|
"os" |
|
|
"path/filepath" |
|
|
"runtime" |
|
|
"strings" |
|
|
|
|
|
"github.com/mudler/xlog" |
|
|
) |
|
|
|
|
|
const ( |
|
|
|
|
|
Nvidia = "nvidia" |
|
|
AMD = "amd" |
|
|
Intel = "intel" |
|
|
|
|
|
|
|
|
defaultCapability = "default" |
|
|
nvidiaL4T = "nvidia-l4t" |
|
|
darwinX86 = "darwin-x86" |
|
|
metal = "metal" |
|
|
vulkan = "vulkan" |
|
|
|
|
|
nvidiaCuda13 = "nvidia-cuda-13" |
|
|
nvidiaCuda12 = "nvidia-cuda-12" |
|
|
nvidiaL4TCuda12 = "nvidia-l4t-cuda-12" |
|
|
nvidiaL4TCuda13 = "nvidia-l4t-cuda-13" |
|
|
|
|
|
capabilityEnv = "LOCALAI_FORCE_META_BACKEND_CAPABILITY" |
|
|
capabilityRunFileEnv = "LOCALAI_FORCE_META_BACKEND_CAPABILITY_RUN_FILE" |
|
|
defaultRunFile = "/run/localai/capability" |
|
|
|
|
|
|
|
|
backendTokenDarwin = "darwin" |
|
|
backendTokenMLX = "mlx" |
|
|
backendTokenMetal = "metal" |
|
|
backendTokenL4T = "l4t" |
|
|
backendTokenCUDA = "cuda" |
|
|
backendTokenROCM = "rocm" |
|
|
backendTokenHIP = "hip" |
|
|
backendTokenSYCL = "sycl" |
|
|
) |
|
|
|
|
|
var ( |
|
|
cuda13DirExists bool |
|
|
cuda12DirExists bool |
|
|
) |
|
|
|
|
|
func init() { |
|
|
_, err := os.Stat(filepath.Join("usr", "local", "cuda-13")) |
|
|
cuda13DirExists = err == nil |
|
|
_, err = os.Stat(filepath.Join("usr", "local", "cuda-12")) |
|
|
cuda12DirExists = err == nil |
|
|
} |
|
|
|
|
|
func (s *SystemState) Capability(capMap map[string]string) string { |
|
|
reportedCapability := s.getSystemCapabilities() |
|
|
|
|
|
|
|
|
if _, exists := capMap[reportedCapability]; exists { |
|
|
xlog.Debug("Using reported capability", "reportedCapability", reportedCapability, "capMap", capMap) |
|
|
return reportedCapability |
|
|
} |
|
|
|
|
|
xlog.Debug("The requested capability was not found, using default capability", "reportedCapability", reportedCapability, "capMap", capMap) |
|
|
|
|
|
return defaultCapability |
|
|
} |
|
|
|
|
|
func (s *SystemState) getSystemCapabilities() string { |
|
|
capability := os.Getenv(capabilityEnv) |
|
|
if capability != "" { |
|
|
xlog.Info("Using forced capability from environment variable", "capability", capability, "env", capabilityEnv) |
|
|
return capability |
|
|
} |
|
|
|
|
|
capabilityRunFile := defaultRunFile |
|
|
capabilityRunFileEnv := os.Getenv(capabilityRunFileEnv) |
|
|
if capabilityRunFileEnv != "" { |
|
|
capabilityRunFile = capabilityRunFileEnv |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if _, err := os.Stat(capabilityRunFile); err == nil { |
|
|
capability, err := os.ReadFile(capabilityRunFile) |
|
|
if err == nil { |
|
|
xlog.Info("Using forced capability run file", "capabilityRunFile", capabilityRunFile, "capability", string(capability), "env", capabilityRunFileEnv) |
|
|
return strings.Trim(strings.TrimSpace(string(capability)), "\n") |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
if runtime.GOOS == "darwin" && runtime.GOARCH == "arm64" { |
|
|
xlog.Info("Using metal capability (arm64 on mac)", "env", capabilityEnv) |
|
|
return metal |
|
|
} |
|
|
|
|
|
|
|
|
if runtime.GOOS == "darwin" && runtime.GOARCH == "amd64" { |
|
|
xlog.Info("Using darwin-x86 capability (amd64 on mac)", "env", capabilityEnv) |
|
|
return darwinX86 |
|
|
} |
|
|
|
|
|
|
|
|
if runtime.GOOS == "linux" && runtime.GOARCH == "arm64" { |
|
|
if s.GPUVendor == Nvidia { |
|
|
xlog.Info("Using nvidia-l4t capability (arm64 on linux)", "env", capabilityEnv) |
|
|
if cuda13DirExists { |
|
|
return nvidiaL4TCuda13 |
|
|
} |
|
|
if cuda12DirExists { |
|
|
return nvidiaL4TCuda12 |
|
|
} |
|
|
return nvidiaL4T |
|
|
} |
|
|
} |
|
|
|
|
|
if cuda13DirExists { |
|
|
return nvidiaCuda13 |
|
|
} |
|
|
|
|
|
if cuda12DirExists { |
|
|
return nvidiaCuda12 |
|
|
} |
|
|
|
|
|
if s.GPUVendor == "" { |
|
|
xlog.Info("Default capability (no GPU detected)", "env", capabilityEnv) |
|
|
return defaultCapability |
|
|
} |
|
|
|
|
|
xlog.Info("Capability automatically detected", "capability", s.GPUVendor, "env", capabilityEnv) |
|
|
|
|
|
if s.VRAM <= 4*1024*1024*1024 { |
|
|
xlog.Warn("VRAM is less than 4GB, defaulting to CPU", "env", capabilityEnv) |
|
|
return defaultCapability |
|
|
} |
|
|
|
|
|
return s.GPUVendor |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
func (s *SystemState) BackendPreferenceTokens() []string { |
|
|
capStr := strings.ToLower(s.getSystemCapabilities()) |
|
|
switch { |
|
|
case strings.HasPrefix(capStr, Nvidia): |
|
|
return []string{backendTokenCUDA, vulkan, "cpu"} |
|
|
case strings.HasPrefix(capStr, AMD): |
|
|
return []string{backendTokenROCM, backendTokenHIP, vulkan, "cpu"} |
|
|
case strings.HasPrefix(capStr, Intel): |
|
|
return []string{backendTokenSYCL, Intel, "cpu"} |
|
|
case strings.HasPrefix(capStr, metal): |
|
|
return []string{backendTokenMetal, "cpu"} |
|
|
case strings.HasPrefix(capStr, darwinX86): |
|
|
return []string{"darwin-x86", "cpu"} |
|
|
case strings.HasPrefix(capStr, vulkan): |
|
|
return []string{vulkan, "cpu"} |
|
|
default: |
|
|
return []string{"cpu"} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
func (s *SystemState) DetectedCapability() string { |
|
|
return s.getSystemCapabilities() |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
func (s *SystemState) IsBackendCompatible(name, uri string) bool { |
|
|
combined := strings.ToLower(name + " " + uri) |
|
|
capability := s.getSystemCapabilities() |
|
|
|
|
|
|
|
|
isDarwinBackend := strings.Contains(combined, backendTokenDarwin) || |
|
|
strings.Contains(combined, backendTokenMLX) || |
|
|
strings.Contains(combined, backendTokenMetal) |
|
|
if isDarwinBackend { |
|
|
|
|
|
return capability == metal || capability == darwinX86 |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
isL4TBackend := strings.Contains(combined, backendTokenL4T) |
|
|
if isL4TBackend { |
|
|
return strings.HasPrefix(capability, nvidiaL4T) |
|
|
} |
|
|
|
|
|
|
|
|
isNvidiaBackend := strings.Contains(combined, backendTokenCUDA) || |
|
|
strings.Contains(combined, Nvidia) |
|
|
if isNvidiaBackend { |
|
|
|
|
|
return strings.HasPrefix(capability, Nvidia) |
|
|
} |
|
|
|
|
|
|
|
|
isAMDBackend := strings.Contains(combined, backendTokenROCM) || |
|
|
strings.Contains(combined, backendTokenHIP) || |
|
|
strings.Contains(combined, AMD) |
|
|
if isAMDBackend { |
|
|
return capability == AMD |
|
|
} |
|
|
|
|
|
|
|
|
isIntelBackend := strings.Contains(combined, backendTokenSYCL) || |
|
|
strings.Contains(combined, Intel) |
|
|
if isIntelBackend { |
|
|
return capability == Intel |
|
|
} |
|
|
|
|
|
|
|
|
return true |
|
|
} |
|
|
|