| package xsysinfo |
|
|
| import ( |
| gguf "github.com/gpustack/gguf-parser-go" |
| ) |
|
|
| type VRAMEstimate struct { |
| TotalVRAM uint64 |
| AvailableVRAM uint64 |
| ModelSize uint64 |
| EstimatedLayers int |
| EstimatedVRAM uint64 |
| IsFullOffload bool |
| } |
|
|
| func EstimateGGUFVRAMUsage(f *gguf.GGUFFile, availableVRAM uint64) (*VRAMEstimate, error) { |
| |
| m := f.Metadata() |
|
|
| estimate := f.EstimateLLaMACppRun() |
|
|
| lmes := estimate.SummarizeItem(true, 0, 0) |
| estimatedVRAM := uint64(0) |
| availableLayers := lmes.OffloadLayers |
|
|
| for _, vram := range lmes.VRAMs { |
| estimatedVRAM += uint64(vram.NonUMA) |
| } |
|
|
| |
| modelSize := uint64(m.Size) |
|
|
| if availableLayers == 0 { |
| availableLayers = 1 |
| } |
|
|
| if estimatedVRAM == 0 { |
| estimatedVRAM = 1 |
| } |
|
|
| |
| |
| layerSize := estimatedVRAM / availableLayers |
|
|
| estimatedLayers := int(availableVRAM / layerSize) |
| if availableVRAM > estimatedVRAM { |
| estimatedLayers = int(availableLayers) |
| } |
|
|
| |
|
|
| return &VRAMEstimate{ |
| TotalVRAM: availableVRAM, |
| AvailableVRAM: availableVRAM, |
| ModelSize: modelSize, |
| EstimatedLayers: estimatedLayers, |
| EstimatedVRAM: estimatedVRAM, |
| IsFullOffload: availableVRAM > estimatedVRAM, |
| }, nil |
| } |
|
|