File size: 3,370 Bytes
50b0b12 dacc7a8 50b0b12 5cceced 50b0b12 a61f522 50b0b12 5cceced 50b0b12 5cceced 50b0b12 5cceced |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 |
---
license: mit
pipeline_tag: text-to-speech
tags:
- text-to-speech
- automatic-speech-recognition
---
<div align="center" style="text-align: center;">
<img src="./assets/logo.png" alt="logo" width="300" style="display: block; margin: 0 auto;" />
</div>
<p align="center">
<a href="https://github.com/getcharzp/go-speech/fork" target="blank">
<img src="https://img.shields.io/github/forks/getcharzp/go-speech?style=for-the-badge" alt="go-speech forks"/>
</a>
<a href="https://github.com/getcharzp/go-speech/stargazers" target="blank">
<img src="https://img.shields.io/github/stars/getcharzp/go-speech?style=for-the-badge" alt="go-speech stars"/>
</a>
<a href="https://github.com/getcharzp/go-speech/pulls" target="blank">
<img src="https://img.shields.io/github/issues-pr/getcharzp/go-speech?style=for-the-badge" alt="go-speech pull-requests"/>
</a>
<a href='https://github.com/getcharzp/go-speech/releases'>
<img src='https://img.shields.io/github/release/getcharzp/go-speech?&label=Latest&style=for-the-badge'>
</a>
</p>
go-speech 基于 Golang + [ONNX](https://github.com/microsoft/onnxruntime/releases/tag/v1.23.2) 构建的轻量语音库,支持 TTS(文本转语音)与 ASR(语音转文字)。 集成 MeloTTS 、达摩院 Paraformer 架构模型、Whisper 模型。
## 安装
```shell
# 下载包
go get -u github.com/getcharzp/go-speech
# 下载模型、动态链接库
git clone https://huggingface.co/getcharzp/go-speech
```
## 快速开始
### TTS
```go
package main
import (
"github.com/getcharzp/go-speech/tts/melotts"
"github.com/up-zero/gotool/fileutil"
"log"
)
func main() {
ttsEngine, err := melotts.NewEngine(melotts.DefaultConfig())
if err != nil {
log.Fatalf("创建引擎失败: %v", err)
}
defer ttsEngine.Destroy()
text := "2019年12月30日,中国人口突破14亿人,联系电话: 13800138000。"
wavData, err := ttsEngine.SynthesizeToWav(text, 1.0)
if err != nil {
log.Fatalf("合成失败: %v", err)
}
outputPath := "output.wav"
err = fileutil.FileSave(outputPath, wavData)
if err != nil {
log.Fatalf("保存 WAV 失败: %v", err)
}
}
```
<audio controls>
<source src="https://media.githubusercontent.com/media/GetcharZp/go-speech/master/assets/output.wav" type="audio/wav">
</audio>
### ASR
#### Paraformer
```go
package main
import (
"fmt"
"github.com/getcharzp/go-speech/asr/paraformer"
"log"
)
func main() {
asrEngine, err := paraformer.NewEngine(paraformer.DefaultConfig())
if err != nil {
log.Fatalf("创建引擎失败: %v", err)
}
defer asrEngine.Destroy()
text, err := asrEngine.TranscribeFile("./zh-en.wav")
if err != nil {
log.Printf("识别出错: %v", err)
return
}
fmt.Printf("识别结果: %s\n", text)
}
```
#### Whisper
```go
package main
import (
"fmt"
"github.com/getcharzp/go-speech/asr/whisper"
"log"
)
func main() {
asrEngine, err := whisper.NewEngine(whisper.DefaultConfig())
if err != nil {
log.Fatalf("创建引擎失败: %v", err)
}
defer asrEngine.Destroy()
text, err := asrEngine.TranscribeFile("./zh-en.wav", whisper.TranscribeOption{
Language: whisper.LangZh,
Task: whisper.TaskTranscribe,
})
if err != nil {
log.Fatalf("识别出错: %v", err)
return
}
fmt.Printf("识别结果: %s\n", text) // Yesterday was星期一Today is Tuesday明天是星期三
}
``` |