File size: 2,692 Bytes
a4d9876
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"

[project]
name = "omnivoice"
version = "0.1.5"
description = "OmniVoice: Towards Omnilingual Zero-Shot Text-to-Speech with Diffusion Language Models"
readme = "README.md"
license = "Apache-2.0"
requires-python = ">=3.10"
authors = [{name = "Han Zhu"}]
keywords = [
    "tts",
    "text-to-speech",
    "speech-synthesis",
    "zero-shot",
    "multilingual",
    "diffusion",
    "voice-cloning",
]
classifiers = [
    "Intended Audience :: Science/Research",
    "Intended Audience :: Developers",

    "Topic :: Scientific/Engineering :: Artificial Intelligence",
    "Topic :: Multimedia :: Sound/Audio :: Speech",

    "Operating System :: OS Independent",
    "Programming Language :: Python :: 3",
]
dependencies = [
    "torch>=2.4",
    "torchaudio>=2.4",
    "transformers>=5.3.0",
    "accelerate",
    "pydub",
    "gradio",
    "tensorboardX",
    "webdataset",
    "numpy",
    "soundfile",
    "librosa",
    "uvicorn>=0.42.0",
    "fastapi>=0.135.2",
]

[project.optional-dependencies]

eval = [
    "jiwer==3.1.0",       # WER
    "s3prl",               # Speech representation (HuBERT etc.)
    "funasr",              # ASR models
    "zhconv",              # Chinese character normalization
    "zhon",                # Chinese punctuation
    "unidecode",            # Unicode normalization
]

[project.scripts]
omnivoice-infer = "omnivoice.cli.infer:main"
omnivoice-infer-batch = "omnivoice.cli.infer_batch:main"
omnivoice-demo = "omnivoice.cli.demo:main"

[project.urls]
Homepage = "https://github.com/k2-fsa/OmniVoice"
Repository = "https://github.com/k2-fsa/OmniVoice"
"Bug Tracker" = "https://github.com/k2-fsa/OmniVoice/issues"

[tool.uv.sources]
# Install PyTorch with CUDA support on Linux/Windows (CUDA doesn't exist for Mac).
# NOTE: We must explicitly request them as `dependencies` above. These improved
# versions will not be selected if they're only third-party dependencies.
torch = [
  { index = "pytorch-cuda", marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
]
torchaudio = [
  { index = "pytorch-cuda", marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
]

[[tool.uv.index]]
name = "pytorch-cuda"
# Use PyTorch built for NVIDIA Toolkit version 12.8.
# Available versions: https://pytorch.org/get-started/locally/
url = "https://download.pytorch.org/whl/cu128"
# Only use this index when explicitly requested by `tool.uv.sources`.
explicit = true

[tool.uv]
constraint-dependencies = [
    "torch==2.8.0",
    "torchaudio==2.8.0",
]

[tool.hatch.build.targets.sdist]
include = ["omnivoice"]

[tool.hatch.build.targets.wheel]
packages = ["omnivoice"]