Spaces:
Configuration error
Configuration error
| package backend | |
| import ( | |
| "math/rand" | |
| "os" | |
| "path/filepath" | |
| "github.com/mudler/LocalAI/core/config" | |
| pb "github.com/mudler/LocalAI/pkg/grpc/proto" | |
| "github.com/mudler/LocalAI/pkg/model" | |
| "github.com/rs/zerolog/log" | |
| ) | |
| func ModelOptions(c config.BackendConfig, so *config.ApplicationConfig, opts []model.Option) []model.Option { | |
| name := c.Name | |
| if name == "" { | |
| name = c.Model | |
| } | |
| defOpts := []model.Option{ | |
| model.WithBackendString(c.Backend), | |
| model.WithModel(c.Model), | |
| model.WithAssetDir(so.AssetsDestination), | |
| model.WithContext(so.Context), | |
| model.WithModelID(name), | |
| } | |
| threads := 1 | |
| if c.Threads != nil { | |
| threads = *c.Threads | |
| } | |
| if so.Threads != 0 { | |
| threads = so.Threads | |
| } | |
| c.Threads = &threads | |
| grpcOpts := grpcModelOpts(c) | |
| defOpts = append(defOpts, model.WithLoadGRPCLoadModelOpts(grpcOpts)) | |
| if so.SingleBackend { | |
| defOpts = append(defOpts, model.WithSingleActiveBackend()) | |
| } | |
| if so.ParallelBackendRequests { | |
| defOpts = append(defOpts, model.EnableParallelRequests) | |
| } | |
| if c.GRPC.Attempts != 0 { | |
| defOpts = append(defOpts, model.WithGRPCAttempts(c.GRPC.Attempts)) | |
| } | |
| if c.GRPC.AttemptsSleepTime != 0 { | |
| defOpts = append(defOpts, model.WithGRPCAttemptsDelay(c.GRPC.AttemptsSleepTime)) | |
| } | |
| for k, v := range so.ExternalGRPCBackends { | |
| defOpts = append(defOpts, model.WithExternalBackend(k, v)) | |
| } | |
| return append(defOpts, opts...) | |
| } | |
| func getSeed(c config.BackendConfig) int32 { | |
| var seed int32 = config.RAND_SEED | |
| if c.Seed != nil { | |
| seed = int32(*c.Seed) | |
| } | |
| if seed == config.RAND_SEED { | |
| seed = rand.Int31() | |
| } | |
| return seed | |
| } | |
| func grpcModelOpts(c config.BackendConfig) *pb.ModelOptions { | |
| b := 512 | |
| if c.Batch != 0 { | |
| b = c.Batch | |
| } | |
| f16 := false | |
| if c.F16 != nil { | |
| f16 = *c.F16 | |
| } | |
| embeddings := false | |
| if c.Embeddings != nil { | |
| embeddings = *c.Embeddings | |
| } | |
| lowVRAM := false | |
| if c.LowVRAM != nil { | |
| lowVRAM = *c.LowVRAM | |
| } | |
| mmap := false | |
| if c.MMap != nil { | |
| mmap = *c.MMap | |
| } | |
| ctxSize := 1024 | |
| if c.ContextSize != nil { | |
| ctxSize = *c.ContextSize | |
| } | |
| mmlock := false | |
| if c.MMlock != nil { | |
| mmlock = *c.MMlock | |
| } | |
| nGPULayers := 9999999 | |
| if c.NGPULayers != nil { | |
| nGPULayers = *c.NGPULayers | |
| } | |
| return &pb.ModelOptions{ | |
| CUDA: c.CUDA || c.Diffusers.CUDA, | |
| SchedulerType: c.Diffusers.SchedulerType, | |
| PipelineType: c.Diffusers.PipelineType, | |
| CFGScale: c.Diffusers.CFGScale, | |
| LoraAdapter: c.LoraAdapter, | |
| LoraScale: c.LoraScale, | |
| LoraAdapters: c.LoraAdapters, | |
| LoraScales: c.LoraScales, | |
| F16Memory: f16, | |
| LoraBase: c.LoraBase, | |
| IMG2IMG: c.Diffusers.IMG2IMG, | |
| CLIPModel: c.Diffusers.ClipModel, | |
| CLIPSubfolder: c.Diffusers.ClipSubFolder, | |
| CLIPSkip: int32(c.Diffusers.ClipSkip), | |
| ControlNet: c.Diffusers.ControlNet, | |
| ContextSize: int32(ctxSize), | |
| Seed: getSeed(c), | |
| NBatch: int32(b), | |
| NoMulMatQ: c.NoMulMatQ, | |
| DraftModel: c.DraftModel, | |
| AudioPath: c.VallE.AudioPath, | |
| Quantization: c.Quantization, | |
| LoadFormat: c.LoadFormat, | |
| GPUMemoryUtilization: c.GPUMemoryUtilization, | |
| TrustRemoteCode: c.TrustRemoteCode, | |
| EnforceEager: c.EnforceEager, | |
| SwapSpace: int32(c.SwapSpace), | |
| MaxModelLen: int32(c.MaxModelLen), | |
| TensorParallelSize: int32(c.TensorParallelSize), | |
| MMProj: c.MMProj, | |
| FlashAttention: c.FlashAttention, | |
| NoKVOffload: c.NoKVOffloading, | |
| YarnExtFactor: c.YarnExtFactor, | |
| YarnAttnFactor: c.YarnAttnFactor, | |
| YarnBetaFast: c.YarnBetaFast, | |
| YarnBetaSlow: c.YarnBetaSlow, | |
| NGQA: c.NGQA, | |
| RMSNormEps: c.RMSNormEps, | |
| MLock: mmlock, | |
| RopeFreqBase: c.RopeFreqBase, | |
| RopeScaling: c.RopeScaling, | |
| Type: c.ModelType, | |
| RopeFreqScale: c.RopeFreqScale, | |
| NUMA: c.NUMA, | |
| Embeddings: embeddings, | |
| LowVRAM: lowVRAM, | |
| NGPULayers: int32(nGPULayers), | |
| MMap: mmap, | |
| MainGPU: c.MainGPU, | |
| Threads: int32(*c.Threads), | |
| TensorSplit: c.TensorSplit, | |
| // AutoGPTQ | |
| ModelBaseName: c.AutoGPTQ.ModelBaseName, | |
| Device: c.AutoGPTQ.Device, | |
| UseTriton: c.AutoGPTQ.Triton, | |
| UseFastTokenizer: c.AutoGPTQ.UseFastTokenizer, | |
| // RWKV | |
| Tokenizer: c.Tokenizer, | |
| } | |
| } | |
| func gRPCPredictOpts(c config.BackendConfig, modelPath string) *pb.PredictOptions { | |
| promptCachePath := "" | |
| if c.PromptCachePath != "" { | |
| p := filepath.Join(modelPath, c.PromptCachePath) | |
| err := os.MkdirAll(filepath.Dir(p), 0750) | |
| if err == nil { | |
| promptCachePath = p | |
| } else { | |
| log.Error().Err(err).Str("promptCachePath", promptCachePath).Msg("error creating prompt cache folder") | |
| } | |
| } | |
| return &pb.PredictOptions{ | |
| Temperature: float32(*c.Temperature), | |
| TopP: float32(*c.TopP), | |
| NDraft: c.NDraft, | |
| TopK: int32(*c.TopK), | |
| Tokens: int32(*c.Maxtokens), | |
| Threads: int32(*c.Threads), | |
| PromptCacheAll: c.PromptCacheAll, | |
| PromptCacheRO: c.PromptCacheRO, | |
| PromptCachePath: promptCachePath, | |
| F16KV: *c.F16, | |
| DebugMode: *c.Debug, | |
| Grammar: c.Grammar, | |
| NegativePromptScale: c.NegativePromptScale, | |
| RopeFreqBase: c.RopeFreqBase, | |
| RopeFreqScale: c.RopeFreqScale, | |
| NegativePrompt: c.NegativePrompt, | |
| Mirostat: int32(*c.LLMConfig.Mirostat), | |
| MirostatETA: float32(*c.LLMConfig.MirostatETA), | |
| MirostatTAU: float32(*c.LLMConfig.MirostatTAU), | |
| Debug: *c.Debug, | |
| StopPrompts: c.StopWords, | |
| Repeat: int32(c.RepeatLastN), | |
| FrequencyPenalty: float32(c.FrequencyPenalty), | |
| PresencePenalty: float32(c.PresencePenalty), | |
| Penalty: float32(c.RepeatPenalty), | |
| NKeep: int32(c.Keep), | |
| Batch: int32(c.Batch), | |
| IgnoreEOS: c.IgnoreEOS, | |
| Seed: getSeed(c), | |
| MLock: *c.MMlock, | |
| MMap: *c.MMap, | |
| MainGPU: c.MainGPU, | |
| TensorSplit: c.TensorSplit, | |
| TailFreeSamplingZ: float32(*c.TFZ), | |
| TypicalP: float32(*c.TypicalP), | |
| } | |
| } | |