Spaces:
Running
Running
File size: 4,117 Bytes
750bbe6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
package cli
import (
"context"
"encoding/json"
"errors"
"fmt"
"strings"
"github.com/mudler/LocalAI/core/backend"
cliContext "github.com/mudler/LocalAI/core/cli/context"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/pkg/format"
"github.com/mudler/LocalAI/pkg/model"
"github.com/mudler/LocalAI/pkg/system"
"github.com/mudler/xlog"
)
type TranscriptCMD struct {
Filename string `arg:"" name:"file" help:"Audio file to transcribe" type:"path"`
Backend string `short:"b" default:"whisper" help:"Backend to run the transcription model"`
Model string `short:"m" required:"" help:"Model name to run the TTS"`
Language string `short:"l" help:"Language of the audio file"`
Translate bool `short:"c" help:"Translate the transcription to English"`
Diarize bool `short:"d" help:"Mark speaker turns"`
Threads int `short:"t" default:"1" help:"Number of threads used for parallel computation"`
BackendsPath string `env:"LOCALAI_BACKENDS_PATH,BACKENDS_PATH" type:"path" default:"${basepath}/backends" help:"Path containing backends used for inferencing" group:"storage"`
ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
BackendGalleries string `env:"LOCALAI_BACKEND_GALLERIES,BACKEND_GALLERIES" help:"JSON list of backend galleries" group:"backends" default:"${backends}"`
Prompt string `short:"p" help:"Previous transcribed text or words that hint at what the model should expect"`
ResponseFormat schema.TranscriptionResponseFormatType `short:"f" default:"" help:"Response format for Whisper models, can be one of (txt, lrc, srt, vtt, json, json_verbose)"`
PrettyPrint bool `help:"Used with response_format json or json_verbose for pretty printing"`
}
func (t *TranscriptCMD) Run(ctx *cliContext.Context) error {
systemState, err := system.GetSystemState(
system.WithBackendPath(t.BackendsPath),
system.WithModelPath(t.ModelsPath),
)
if err != nil {
return err
}
opts := &config.ApplicationConfig{
SystemState: systemState,
Context: context.Background(),
}
cl := config.NewModelConfigLoader(t.ModelsPath)
ml := model.NewModelLoader(systemState)
if err := gallery.RegisterBackends(systemState, ml); err != nil {
xlog.Error("error registering external backends", "error", err)
}
if err := cl.LoadModelConfigsFromPath(t.ModelsPath); err != nil {
return err
}
c, exists := cl.GetModelConfig(t.Model)
if !exists {
return errors.New("model not found")
}
c.Threads = &t.Threads
defer func() {
err := ml.StopAllGRPC()
if err != nil {
xlog.Error("unable to stop all grpc processes", "error", err)
}
}()
tr, err := backend.ModelTranscription(t.Filename, t.Language, t.Translate, t.Diarize, t.Prompt, ml, c, opts)
if err != nil {
return err
}
switch t.ResponseFormat {
case schema.TranscriptionResponseFormatLrc, schema.TranscriptionResponseFormatSrt, schema.TranscriptionResponseFormatVtt, schema.TranscriptionResponseFormatText:
fmt.Println(format.TranscriptionResponse(tr, t.ResponseFormat))
case schema.TranscriptionResponseFormatJson:
tr.Segments = nil
fallthrough
case schema.TranscriptionResponseFormatJsonVerbose:
var mtr []byte
var err error
if t.PrettyPrint {
mtr, err = json.MarshalIndent(tr, "", " ")
} else {
mtr, err = json.Marshal(tr)
}
if err != nil {
return err
}
fmt.Println(string(mtr))
default:
for _, segment := range tr.Segments {
fmt.Println(segment.Start.String(), "-", strings.TrimSpace(segment.Text))
}
}
return nil
}
|