|
|
package model |
|
|
|
|
|
import ( |
|
|
"context" |
|
|
"fmt" |
|
|
"maps" |
|
|
"os" |
|
|
"path/filepath" |
|
|
"strings" |
|
|
"sync" |
|
|
"time" |
|
|
|
|
|
"github.com/mudler/LocalAI/pkg/system" |
|
|
"github.com/mudler/LocalAI/pkg/utils" |
|
|
|
|
|
"github.com/mudler/xlog" |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
type ModelLoader struct { |
|
|
ModelPath string |
|
|
mu sync.Mutex |
|
|
models map[string]*Model |
|
|
loading map[string]chan struct{} |
|
|
wd *WatchDog |
|
|
externalBackends map[string]string |
|
|
lruEvictionMaxRetries int |
|
|
lruEvictionRetryInterval time.Duration |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
func NewModelLoader(system *system.SystemState) *ModelLoader { |
|
|
nml := &ModelLoader{ |
|
|
ModelPath: system.Model.ModelsPath, |
|
|
models: make(map[string]*Model), |
|
|
loading: make(map[string]chan struct{}), |
|
|
externalBackends: make(map[string]string), |
|
|
lruEvictionMaxRetries: 30, |
|
|
lruEvictionRetryInterval: 1 * time.Second, |
|
|
} |
|
|
|
|
|
return nml |
|
|
} |
|
|
|
|
|
|
|
|
func (ml *ModelLoader) GetLoadingCount() int { |
|
|
ml.mu.Lock() |
|
|
defer ml.mu.Unlock() |
|
|
return len(ml.loading) |
|
|
} |
|
|
|
|
|
func (ml *ModelLoader) SetWatchDog(wd *WatchDog) { |
|
|
ml.wd = wd |
|
|
} |
|
|
|
|
|
func (ml *ModelLoader) GetWatchDog() *WatchDog { |
|
|
return ml.wd |
|
|
} |
|
|
|
|
|
|
|
|
func (ml *ModelLoader) SetLRUEvictionRetrySettings(maxRetries int, retryInterval time.Duration) { |
|
|
ml.mu.Lock() |
|
|
defer ml.mu.Unlock() |
|
|
ml.lruEvictionMaxRetries = maxRetries |
|
|
ml.lruEvictionRetryInterval = retryInterval |
|
|
} |
|
|
|
|
|
func (ml *ModelLoader) ExistsInModelPath(s string) bool { |
|
|
return utils.ExistsInPath(ml.ModelPath, s) |
|
|
} |
|
|
|
|
|
func (ml *ModelLoader) SetExternalBackend(name, uri string) { |
|
|
ml.mu.Lock() |
|
|
defer ml.mu.Unlock() |
|
|
ml.externalBackends[name] = uri |
|
|
} |
|
|
|
|
|
func (ml *ModelLoader) DeleteExternalBackend(name string) { |
|
|
ml.mu.Lock() |
|
|
defer ml.mu.Unlock() |
|
|
delete(ml.externalBackends, name) |
|
|
} |
|
|
|
|
|
func (ml *ModelLoader) GetExternalBackend(name string) string { |
|
|
ml.mu.Lock() |
|
|
defer ml.mu.Unlock() |
|
|
return ml.externalBackends[name] |
|
|
} |
|
|
|
|
|
func (ml *ModelLoader) GetAllExternalBackends(o *Options) map[string]string { |
|
|
backends := make(map[string]string) |
|
|
maps.Copy(backends, ml.externalBackends) |
|
|
if o != nil { |
|
|
maps.Copy(backends, o.externalBackends) |
|
|
} |
|
|
return backends |
|
|
} |
|
|
|
|
|
var knownFilesToSkip []string = []string{ |
|
|
"MODEL_CARD", |
|
|
"README", |
|
|
"README.md", |
|
|
} |
|
|
|
|
|
var knownModelsNameSuffixToSkip []string = []string{ |
|
|
".tmpl", |
|
|
".keep", |
|
|
".yaml", |
|
|
".yml", |
|
|
".json", |
|
|
".txt", |
|
|
".pt", |
|
|
".onnx", |
|
|
".md", |
|
|
".MD", |
|
|
".DS_Store", |
|
|
".", |
|
|
".safetensors", |
|
|
".bin", |
|
|
".partial", |
|
|
".tar.gz", |
|
|
} |
|
|
|
|
|
const retryTimeout = time.Duration(2 * time.Minute) |
|
|
|
|
|
func (ml *ModelLoader) ListFilesInModelPath() ([]string, error) { |
|
|
files, err := os.ReadDir(ml.ModelPath) |
|
|
if err != nil { |
|
|
return []string{}, err |
|
|
} |
|
|
|
|
|
models := []string{} |
|
|
FILE: |
|
|
for _, file := range files { |
|
|
|
|
|
for _, skip := range knownFilesToSkip { |
|
|
if strings.EqualFold(file.Name(), skip) { |
|
|
continue FILE |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
for _, skip := range knownModelsNameSuffixToSkip { |
|
|
if strings.HasSuffix(file.Name(), skip) { |
|
|
continue FILE |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
if file.IsDir() { |
|
|
continue |
|
|
} |
|
|
|
|
|
models = append(models, file.Name()) |
|
|
} |
|
|
|
|
|
return models, nil |
|
|
} |
|
|
|
|
|
func (ml *ModelLoader) ListLoadedModels() []*Model { |
|
|
ml.mu.Lock() |
|
|
defer ml.mu.Unlock() |
|
|
|
|
|
models := []*Model{} |
|
|
for _, model := range ml.models { |
|
|
models = append(models, model) |
|
|
} |
|
|
|
|
|
return models |
|
|
} |
|
|
|
|
|
func (ml *ModelLoader) LoadModel(modelID, modelName string, loader func(string, string, string) (*Model, error)) (*Model, error) { |
|
|
ml.mu.Lock() |
|
|
|
|
|
|
|
|
if model := ml.checkIsLoaded(modelID); model != nil { |
|
|
ml.mu.Unlock() |
|
|
return model, nil |
|
|
} |
|
|
|
|
|
|
|
|
if loadingChan, isLoading := ml.loading[modelID]; isLoading { |
|
|
ml.mu.Unlock() |
|
|
|
|
|
xlog.Debug("Waiting for model to be loaded by another request", "modelID", modelID) |
|
|
<-loadingChan |
|
|
|
|
|
ml.mu.Lock() |
|
|
model := ml.checkIsLoaded(modelID) |
|
|
ml.mu.Unlock() |
|
|
if model != nil { |
|
|
return model, nil |
|
|
} |
|
|
|
|
|
return ml.LoadModel(modelID, modelName, loader) |
|
|
} |
|
|
|
|
|
|
|
|
loadingChan := make(chan struct{}) |
|
|
ml.loading[modelID] = loadingChan |
|
|
ml.mu.Unlock() |
|
|
|
|
|
|
|
|
defer func() { |
|
|
ml.mu.Lock() |
|
|
delete(ml.loading, modelID) |
|
|
close(loadingChan) |
|
|
ml.mu.Unlock() |
|
|
}() |
|
|
|
|
|
|
|
|
modelFile := filepath.Join(ml.ModelPath, modelName) |
|
|
xlog.Debug("Loading model in memory from file", "file", modelFile) |
|
|
|
|
|
model, err := loader(modelID, modelName, modelFile) |
|
|
if err != nil { |
|
|
return nil, fmt.Errorf("failed to load model with internal loader: %s", err) |
|
|
} |
|
|
|
|
|
if model == nil { |
|
|
return nil, fmt.Errorf("loader didn't return a model") |
|
|
} |
|
|
|
|
|
|
|
|
ml.mu.Lock() |
|
|
ml.models[modelID] = model |
|
|
ml.mu.Unlock() |
|
|
|
|
|
return model, nil |
|
|
} |
|
|
|
|
|
func (ml *ModelLoader) ShutdownModel(modelName string) error { |
|
|
ml.mu.Lock() |
|
|
defer ml.mu.Unlock() |
|
|
|
|
|
return ml.deleteProcess(modelName) |
|
|
} |
|
|
|
|
|
func (ml *ModelLoader) CheckIsLoaded(s string) *Model { |
|
|
ml.mu.Lock() |
|
|
defer ml.mu.Unlock() |
|
|
return ml.checkIsLoaded(s) |
|
|
} |
|
|
|
|
|
func (ml *ModelLoader) checkIsLoaded(s string) *Model { |
|
|
m, ok := ml.models[s] |
|
|
if !ok { |
|
|
return nil |
|
|
} |
|
|
|
|
|
xlog.Debug("Model already loaded in memory", "model", s) |
|
|
client := m.GRPC(false, ml.wd) |
|
|
|
|
|
xlog.Debug("Checking model availability", "model", s) |
|
|
cTimeout, cancel := context.WithTimeout(context.Background(), 2*time.Minute) |
|
|
defer cancel() |
|
|
|
|
|
alive, err := client.HealthCheck(cTimeout) |
|
|
if !alive { |
|
|
xlog.Warn("GRPC Model not responding", "error", err) |
|
|
xlog.Warn("Deleting the process in order to recreate it") |
|
|
process := m.Process() |
|
|
if process == nil { |
|
|
xlog.Error("Process not found and the model is not responding anymore", "model", s) |
|
|
return m |
|
|
} |
|
|
if !process.IsAlive() { |
|
|
xlog.Debug("GRPC Process is not responding", "model", s) |
|
|
|
|
|
err := ml.deleteProcess(s) |
|
|
if err != nil { |
|
|
xlog.Error("error stopping process", "error", err, "process", s) |
|
|
} |
|
|
return nil |
|
|
} |
|
|
} |
|
|
|
|
|
return m |
|
|
} |
|
|
|