Upload 12 files
Browse files- LICENSE +373 -0
- README.md +83 -0
- demo.py +121 -0
- server/Dockerfile +21 -0
- server/Dockerfile.cpu +20 -0
- server/Dockerfile.cuda121 +23 -0
- server/main.py +185 -0
- server/requirements.txt +12 -0
- server/requirements_cpu.txt +11 -0
- test/default_speaker.json +0 -0
- test/requirements.txt +2 -0
- test/test_streaming.py +127 -0
LICENSE
ADDED
|
@@ -0,0 +1,373 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Mozilla Public License Version 2.0
|
| 2 |
+
==================================
|
| 3 |
+
|
| 4 |
+
1. Definitions
|
| 5 |
+
--------------
|
| 6 |
+
|
| 7 |
+
1.1. "Contributor"
|
| 8 |
+
means each individual or legal entity that creates, contributes to
|
| 9 |
+
the creation of, or owns Covered Software.
|
| 10 |
+
|
| 11 |
+
1.2. "Contributor Version"
|
| 12 |
+
means the combination of the Contributions of others (if any) used
|
| 13 |
+
by a Contributor and that particular Contributor's Contribution.
|
| 14 |
+
|
| 15 |
+
1.3. "Contribution"
|
| 16 |
+
means Covered Software of a particular Contributor.
|
| 17 |
+
|
| 18 |
+
1.4. "Covered Software"
|
| 19 |
+
means Source Code Form to which the initial Contributor has attached
|
| 20 |
+
the notice in Exhibit A, the Executable Form of such Source Code
|
| 21 |
+
Form, and Modifications of such Source Code Form, in each case
|
| 22 |
+
including portions thereof.
|
| 23 |
+
|
| 24 |
+
1.5. "Incompatible With Secondary Licenses"
|
| 25 |
+
means
|
| 26 |
+
|
| 27 |
+
(a) that the initial Contributor has attached the notice described
|
| 28 |
+
in Exhibit B to the Covered Software; or
|
| 29 |
+
|
| 30 |
+
(b) that the Covered Software was made available under the terms of
|
| 31 |
+
version 1.1 or earlier of the License, but not also under the
|
| 32 |
+
terms of a Secondary License.
|
| 33 |
+
|
| 34 |
+
1.6. "Executable Form"
|
| 35 |
+
means any form of the work other than Source Code Form.
|
| 36 |
+
|
| 37 |
+
1.7. "Larger Work"
|
| 38 |
+
means a work that combines Covered Software with other material, in
|
| 39 |
+
a separate file or files, that is not Covered Software.
|
| 40 |
+
|
| 41 |
+
1.8. "License"
|
| 42 |
+
means this document.
|
| 43 |
+
|
| 44 |
+
1.9. "Licensable"
|
| 45 |
+
means having the right to grant, to the maximum extent possible,
|
| 46 |
+
whether at the time of the initial grant or subsequently, any and
|
| 47 |
+
all of the rights conveyed by this License.
|
| 48 |
+
|
| 49 |
+
1.10. "Modifications"
|
| 50 |
+
means any of the following:
|
| 51 |
+
|
| 52 |
+
(a) any file in Source Code Form that results from an addition to,
|
| 53 |
+
deletion from, or modification of the contents of Covered
|
| 54 |
+
Software; or
|
| 55 |
+
|
| 56 |
+
(b) any new file in Source Code Form that contains any Covered
|
| 57 |
+
Software.
|
| 58 |
+
|
| 59 |
+
1.11. "Patent Claims" of a Contributor
|
| 60 |
+
means any patent claim(s), including without limitation, method,
|
| 61 |
+
process, and apparatus claims, in any patent Licensable by such
|
| 62 |
+
Contributor that would be infringed, but for the grant of the
|
| 63 |
+
License, by the making, using, selling, offering for sale, having
|
| 64 |
+
made, import, or transfer of either its Contributions or its
|
| 65 |
+
Contributor Version.
|
| 66 |
+
|
| 67 |
+
1.12. "Secondary License"
|
| 68 |
+
means either the GNU General Public License, Version 2.0, the GNU
|
| 69 |
+
Lesser General Public License, Version 2.1, the GNU Affero General
|
| 70 |
+
Public License, Version 3.0, or any later versions of those
|
| 71 |
+
licenses.
|
| 72 |
+
|
| 73 |
+
1.13. "Source Code Form"
|
| 74 |
+
means the form of the work preferred for making modifications.
|
| 75 |
+
|
| 76 |
+
1.14. "You" (or "Your")
|
| 77 |
+
means an individual or a legal entity exercising rights under this
|
| 78 |
+
License. For legal entities, "You" includes any entity that
|
| 79 |
+
controls, is controlled by, or is under common control with You. For
|
| 80 |
+
purposes of this definition, "control" means (a) the power, direct
|
| 81 |
+
or indirect, to cause the direction or management of such entity,
|
| 82 |
+
whether by contract or otherwise, or (b) ownership of more than
|
| 83 |
+
fifty percent (50%) of the outstanding shares or beneficial
|
| 84 |
+
ownership of such entity.
|
| 85 |
+
|
| 86 |
+
2. License Grants and Conditions
|
| 87 |
+
--------------------------------
|
| 88 |
+
|
| 89 |
+
2.1. Grants
|
| 90 |
+
|
| 91 |
+
Each Contributor hereby grants You a world-wide, royalty-free,
|
| 92 |
+
non-exclusive license:
|
| 93 |
+
|
| 94 |
+
(a) under intellectual property rights (other than patent or trademark)
|
| 95 |
+
Licensable by such Contributor to use, reproduce, make available,
|
| 96 |
+
modify, display, perform, distribute, and otherwise exploit its
|
| 97 |
+
Contributions, either on an unmodified basis, with Modifications, or
|
| 98 |
+
as part of a Larger Work; and
|
| 99 |
+
|
| 100 |
+
(b) under Patent Claims of such Contributor to make, use, sell, offer
|
| 101 |
+
for sale, have made, import, and otherwise transfer either its
|
| 102 |
+
Contributions or its Contributor Version.
|
| 103 |
+
|
| 104 |
+
2.2. Effective Date
|
| 105 |
+
|
| 106 |
+
The licenses granted in Section 2.1 with respect to any Contribution
|
| 107 |
+
become effective for each Contribution on the date the Contributor first
|
| 108 |
+
distributes such Contribution.
|
| 109 |
+
|
| 110 |
+
2.3. Limitations on Grant Scope
|
| 111 |
+
|
| 112 |
+
The licenses granted in this Section 2 are the only rights granted under
|
| 113 |
+
this License. No additional rights or licenses will be implied from the
|
| 114 |
+
distribution or licensing of Covered Software under this License.
|
| 115 |
+
Notwithstanding Section 2.1(b) above, no patent license is granted by a
|
| 116 |
+
Contributor:
|
| 117 |
+
|
| 118 |
+
(a) for any code that a Contributor has removed from Covered Software;
|
| 119 |
+
or
|
| 120 |
+
|
| 121 |
+
(b) for infringements caused by: (i) Your and any other third party's
|
| 122 |
+
modifications of Covered Software, or (ii) the combination of its
|
| 123 |
+
Contributions with other software (except as part of its Contributor
|
| 124 |
+
Version); or
|
| 125 |
+
|
| 126 |
+
(c) under Patent Claims infringed by Covered Software in the absence of
|
| 127 |
+
its Contributions.
|
| 128 |
+
|
| 129 |
+
This License does not grant any rights in the trademarks, service marks,
|
| 130 |
+
or logos of any Contributor (except as may be necessary to comply with
|
| 131 |
+
the notice requirements in Section 3.4).
|
| 132 |
+
|
| 133 |
+
2.4. Subsequent Licenses
|
| 134 |
+
|
| 135 |
+
No Contributor makes additional grants as a result of Your choice to
|
| 136 |
+
distribute the Covered Software under a subsequent version of this
|
| 137 |
+
License (see Section 10.2) or under the terms of a Secondary License (if
|
| 138 |
+
permitted under the terms of Section 3.3).
|
| 139 |
+
|
| 140 |
+
2.5. Representation
|
| 141 |
+
|
| 142 |
+
Each Contributor represents that the Contributor believes its
|
| 143 |
+
Contributions are its original creation(s) or it has sufficient rights
|
| 144 |
+
to grant the rights to its Contributions conveyed by this License.
|
| 145 |
+
|
| 146 |
+
2.6. Fair Use
|
| 147 |
+
|
| 148 |
+
This License is not intended to limit any rights You have under
|
| 149 |
+
applicable copyright doctrines of fair use, fair dealing, or other
|
| 150 |
+
equivalents.
|
| 151 |
+
|
| 152 |
+
2.7. Conditions
|
| 153 |
+
|
| 154 |
+
Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted
|
| 155 |
+
in Section 2.1.
|
| 156 |
+
|
| 157 |
+
3. Responsibilities
|
| 158 |
+
-------------------
|
| 159 |
+
|
| 160 |
+
3.1. Distribution of Source Form
|
| 161 |
+
|
| 162 |
+
All distribution of Covered Software in Source Code Form, including any
|
| 163 |
+
Modifications that You create or to which You contribute, must be under
|
| 164 |
+
the terms of this License. You must inform recipients that the Source
|
| 165 |
+
Code Form of the Covered Software is governed by the terms of this
|
| 166 |
+
License, and how they can obtain a copy of this License. You may not
|
| 167 |
+
attempt to alter or restrict the recipients' rights in the Source Code
|
| 168 |
+
Form.
|
| 169 |
+
|
| 170 |
+
3.2. Distribution of Executable Form
|
| 171 |
+
|
| 172 |
+
If You distribute Covered Software in Executable Form then:
|
| 173 |
+
|
| 174 |
+
(a) such Covered Software must also be made available in Source Code
|
| 175 |
+
Form, as described in Section 3.1, and You must inform recipients of
|
| 176 |
+
the Executable Form how they can obtain a copy of such Source Code
|
| 177 |
+
Form by reasonable means in a timely manner, at a charge no more
|
| 178 |
+
than the cost of distribution to the recipient; and
|
| 179 |
+
|
| 180 |
+
(b) You may distribute such Executable Form under the terms of this
|
| 181 |
+
License, or sublicense it under different terms, provided that the
|
| 182 |
+
license for the Executable Form does not attempt to limit or alter
|
| 183 |
+
the recipients' rights in the Source Code Form under this License.
|
| 184 |
+
|
| 185 |
+
3.3. Distribution of a Larger Work
|
| 186 |
+
|
| 187 |
+
You may create and distribute a Larger Work under terms of Your choice,
|
| 188 |
+
provided that You also comply with the requirements of this License for
|
| 189 |
+
the Covered Software. If the Larger Work is a combination of Covered
|
| 190 |
+
Software with a work governed by one or more Secondary Licenses, and the
|
| 191 |
+
Covered Software is not Incompatible With Secondary Licenses, this
|
| 192 |
+
License permits You to additionally distribute such Covered Software
|
| 193 |
+
under the terms of such Secondary License(s), so that the recipient of
|
| 194 |
+
the Larger Work may, at their option, further distribute the Covered
|
| 195 |
+
Software under the terms of either this License or such Secondary
|
| 196 |
+
License(s).
|
| 197 |
+
|
| 198 |
+
3.4. Notices
|
| 199 |
+
|
| 200 |
+
You may not remove or alter the substance of any license notices
|
| 201 |
+
(including copyright notices, patent notices, disclaimers of warranty,
|
| 202 |
+
or limitations of liability) contained within the Source Code Form of
|
| 203 |
+
the Covered Software, except that You may alter any license notices to
|
| 204 |
+
the extent required to remedy known factual inaccuracies.
|
| 205 |
+
|
| 206 |
+
3.5. Application of Additional Terms
|
| 207 |
+
|
| 208 |
+
You may choose to offer, and to charge a fee for, warranty, support,
|
| 209 |
+
indemnity or liability obligations to one or more recipients of Covered
|
| 210 |
+
Software. However, You may do so only on Your own behalf, and not on
|
| 211 |
+
behalf of any Contributor. You must make it absolutely clear that any
|
| 212 |
+
such warranty, support, indemnity, or liability obligation is offered by
|
| 213 |
+
You alone, and You hereby agree to indemnify every Contributor for any
|
| 214 |
+
liability incurred by such Contributor as a result of warranty, support,
|
| 215 |
+
indemnity or liability terms You offer. You may include additional
|
| 216 |
+
disclaimers of warranty and limitations of liability specific to any
|
| 217 |
+
jurisdiction.
|
| 218 |
+
|
| 219 |
+
4. Inability to Comply Due to Statute or Regulation
|
| 220 |
+
---------------------------------------------------
|
| 221 |
+
|
| 222 |
+
If it is impossible for You to comply with any of the terms of this
|
| 223 |
+
License with respect to some or all of the Covered Software due to
|
| 224 |
+
statute, judicial order, or regulation then You must: (a) comply with
|
| 225 |
+
the terms of this License to the maximum extent possible; and (b)
|
| 226 |
+
describe the limitations and the code they affect. Such description must
|
| 227 |
+
be placed in a text file included with all distributions of the Covered
|
| 228 |
+
Software under this License. Except to the extent prohibited by statute
|
| 229 |
+
or regulation, such description must be sufficiently detailed for a
|
| 230 |
+
recipient of ordinary skill to be able to understand it.
|
| 231 |
+
|
| 232 |
+
5. Termination
|
| 233 |
+
--------------
|
| 234 |
+
|
| 235 |
+
5.1. The rights granted under this License will terminate automatically
|
| 236 |
+
if You fail to comply with any of its terms. However, if You become
|
| 237 |
+
compliant, then the rights granted under this License from a particular
|
| 238 |
+
Contributor are reinstated (a) provisionally, unless and until such
|
| 239 |
+
Contributor explicitly and finally terminates Your grants, and (b) on an
|
| 240 |
+
ongoing basis, if such Contributor fails to notify You of the
|
| 241 |
+
non-compliance by some reasonable means prior to 60 days after You have
|
| 242 |
+
come back into compliance. Moreover, Your grants from a particular
|
| 243 |
+
Contributor are reinstated on an ongoing basis if such Contributor
|
| 244 |
+
notifies You of the non-compliance by some reasonable means, this is the
|
| 245 |
+
first time You have received notice of non-compliance with this License
|
| 246 |
+
from such Contributor, and You become compliant prior to 30 days after
|
| 247 |
+
Your receipt of the notice.
|
| 248 |
+
|
| 249 |
+
5.2. If You initiate litigation against any entity by asserting a patent
|
| 250 |
+
infringement claim (excluding declaratory judgment actions,
|
| 251 |
+
counter-claims, and cross-claims) alleging that a Contributor Version
|
| 252 |
+
directly or indirectly infringes any patent, then the rights granted to
|
| 253 |
+
You by any and all Contributors for the Covered Software under Section
|
| 254 |
+
2.1 of this License shall terminate.
|
| 255 |
+
|
| 256 |
+
5.3. In the event of termination under Sections 5.1 or 5.2 above, all
|
| 257 |
+
end user license agreements (excluding distributors and resellers) which
|
| 258 |
+
have been validly granted by You or Your distributors under this License
|
| 259 |
+
prior to termination shall survive termination.
|
| 260 |
+
|
| 261 |
+
************************************************************************
|
| 262 |
+
* *
|
| 263 |
+
* 6. Disclaimer of Warranty *
|
| 264 |
+
* ------------------------- *
|
| 265 |
+
* *
|
| 266 |
+
* Covered Software is provided under this License on an "as is" *
|
| 267 |
+
* basis, without warranty of any kind, either expressed, implied, or *
|
| 268 |
+
* statutory, including, without limitation, warranties that the *
|
| 269 |
+
* Covered Software is free of defects, merchantable, fit for a *
|
| 270 |
+
* particular purpose or non-infringing. The entire risk as to the *
|
| 271 |
+
* quality and performance of the Covered Software is with You. *
|
| 272 |
+
* Should any Covered Software prove defective in any respect, You *
|
| 273 |
+
* (not any Contributor) assume the cost of any necessary servicing, *
|
| 274 |
+
* repair, or correction. This disclaimer of warranty constitutes an *
|
| 275 |
+
* essential part of this License. No use of any Covered Software is *
|
| 276 |
+
* authorized under this License except under this disclaimer. *
|
| 277 |
+
* *
|
| 278 |
+
************************************************************************
|
| 279 |
+
|
| 280 |
+
************************************************************************
|
| 281 |
+
* *
|
| 282 |
+
* 7. Limitation of Liability *
|
| 283 |
+
* -------------------------- *
|
| 284 |
+
* *
|
| 285 |
+
* Under no circumstances and under no legal theory, whether tort *
|
| 286 |
+
* (including negligence), contract, or otherwise, shall any *
|
| 287 |
+
* Contributor, or anyone who distributes Covered Software as *
|
| 288 |
+
* permitted above, be liable to You for any direct, indirect, *
|
| 289 |
+
* special, incidental, or consequential damages of any character *
|
| 290 |
+
* including, without limitation, damages for lost profits, loss of *
|
| 291 |
+
* goodwill, work stoppage, computer failure or malfunction, or any *
|
| 292 |
+
* and all other commercial damages or losses, even if such party *
|
| 293 |
+
* shall have been informed of the possibility of such damages. This *
|
| 294 |
+
* limitation of liability shall not apply to liability for death or *
|
| 295 |
+
* personal injury resulting from such party's negligence to the *
|
| 296 |
+
* extent applicable law prohibits such limitation. Some *
|
| 297 |
+
* jurisdictions do not allow the exclusion or limitation of *
|
| 298 |
+
* incidental or consequential damages, so this exclusion and *
|
| 299 |
+
* limitation may not apply to You. *
|
| 300 |
+
* *
|
| 301 |
+
************************************************************************
|
| 302 |
+
|
| 303 |
+
8. Litigation
|
| 304 |
+
-------------
|
| 305 |
+
|
| 306 |
+
Any litigation relating to this License may be brought only in the
|
| 307 |
+
courts of a jurisdiction where the defendant maintains its principal
|
| 308 |
+
place of business and such litigation shall be governed by laws of that
|
| 309 |
+
jurisdiction, without reference to its conflict-of-law provisions.
|
| 310 |
+
Nothing in this Section shall prevent a party's ability to bring
|
| 311 |
+
cross-claims or counter-claims.
|
| 312 |
+
|
| 313 |
+
9. Miscellaneous
|
| 314 |
+
----------------
|
| 315 |
+
|
| 316 |
+
This License represents the complete agreement concerning the subject
|
| 317 |
+
matter hereof. If any provision of this License is held to be
|
| 318 |
+
unenforceable, such provision shall be reformed only to the extent
|
| 319 |
+
necessary to make it enforceable. Any law or regulation which provides
|
| 320 |
+
that the language of a contract shall be construed against the drafter
|
| 321 |
+
shall not be used to construe this License against a Contributor.
|
| 322 |
+
|
| 323 |
+
10. Versions of the License
|
| 324 |
+
---------------------------
|
| 325 |
+
|
| 326 |
+
10.1. New Versions
|
| 327 |
+
|
| 328 |
+
Mozilla Foundation is the license steward. Except as provided in Section
|
| 329 |
+
10.3, no one other than the license steward has the right to modify or
|
| 330 |
+
publish new versions of this License. Each version will be given a
|
| 331 |
+
distinguishing version number.
|
| 332 |
+
|
| 333 |
+
10.2. Effect of New Versions
|
| 334 |
+
|
| 335 |
+
You may distribute the Covered Software under the terms of the version
|
| 336 |
+
of the License under which You originally received the Covered Software,
|
| 337 |
+
or under the terms of any subsequent version published by the license
|
| 338 |
+
steward.
|
| 339 |
+
|
| 340 |
+
10.3. Modified Versions
|
| 341 |
+
|
| 342 |
+
If you create software not governed by this License, and you want to
|
| 343 |
+
create a new license for such software, you may create and use a
|
| 344 |
+
modified version of this License if you rename the license and remove
|
| 345 |
+
any references to the name of the license steward (except to note that
|
| 346 |
+
such modified license differs from this License).
|
| 347 |
+
|
| 348 |
+
10.4. Distributing Source Code Form that is Incompatible With Secondary
|
| 349 |
+
Licenses
|
| 350 |
+
|
| 351 |
+
If You choose to distribute Source Code Form that is Incompatible With
|
| 352 |
+
Secondary Licenses under the terms of this version of the License, the
|
| 353 |
+
notice described in Exhibit B of this License must be attached.
|
| 354 |
+
|
| 355 |
+
Exhibit A - Source Code Form License Notice
|
| 356 |
+
-------------------------------------------
|
| 357 |
+
|
| 358 |
+
This Source Code Form is subject to the terms of the Mozilla Public
|
| 359 |
+
License, v. 2.0. If a copy of the MPL was not distributed with this
|
| 360 |
+
file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
| 361 |
+
|
| 362 |
+
If it is not possible or desirable to put the notice in a particular
|
| 363 |
+
file, then You may include the notice in a location (such as a LICENSE
|
| 364 |
+
file in a relevant directory) where a recipient would be likely to look
|
| 365 |
+
for such a notice.
|
| 366 |
+
|
| 367 |
+
You may add additional accurate notices of copyright ownership.
|
| 368 |
+
|
| 369 |
+
Exhibit B - "Incompatible With Secondary Licenses" Notice
|
| 370 |
+
---------------------------------------------------------
|
| 371 |
+
|
| 372 |
+
This Source Code Form is "Incompatible With Secondary Licenses", as
|
| 373 |
+
defined by the Mozilla Public License, v. 2.0.
|
README.md
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# XTTS streaming server
|
| 2 |
+
*Warning: XTTS-streaming-server doesn't support concurrent streaming requests, it's a demo server, not meant for production.*
|
| 3 |
+
|
| 4 |
+
https://github.com/coqui-ai/xtts-streaming-server/assets/17219561/7220442a-e88a-4288-8a73-608c4b39d06c
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
## 1) Run the server
|
| 8 |
+
|
| 9 |
+
### Use a pre-built image
|
| 10 |
+
|
| 11 |
+
CUDA 12.1:
|
| 12 |
+
|
| 13 |
+
```bash
|
| 14 |
+
$ docker run --gpus=all -e COQUI_TOS_AGREED=1 --rm -p 8000:80 ghcr.io/coqui-ai/xtts-streaming-server:latest-cuda121
|
| 15 |
+
```
|
| 16 |
+
|
| 17 |
+
CUDA 11.8 (for older cards):
|
| 18 |
+
|
| 19 |
+
```bash
|
| 20 |
+
$ docker run --gpus=all -e COQUI_TOS_AGREED=1 --rm -p 8000:80 ghcr.io/coqui-ai/xtts-streaming-server:latest
|
| 21 |
+
```
|
| 22 |
+
|
| 23 |
+
CPU (not recommended):
|
| 24 |
+
|
| 25 |
+
```bash
|
| 26 |
+
$ docker run -e COQUI_TOS_AGREED=1 --rm -p 8000:80 ghcr.io/coqui-ai/xtts-streaming-server:latest-cpu
|
| 27 |
+
```
|
| 28 |
+
|
| 29 |
+
Run with a fine-tuned model:
|
| 30 |
+
|
| 31 |
+
Make sure the model folder `/path/to/model/folder` contains the following files:
|
| 32 |
+
- `config.json`
|
| 33 |
+
- `model.pth`
|
| 34 |
+
- `vocab.json`
|
| 35 |
+
|
| 36 |
+
```bash
|
| 37 |
+
$ docker run -v /path/to/model/folder:/app/tts_models --gpus=all -e COQUI_TOS_AGREED=1 --rm -p 8000:80 ghcr.io/coqui-ai/xtts-streaming-server:latest`
|
| 38 |
+
```
|
| 39 |
+
|
| 40 |
+
Setting the `COQUI_TOS_AGREED` environment variable to `1` indicates you have read and agreed to
|
| 41 |
+
the terms of the [CPML license](https://coqui.ai/cpml). (Fine-tuned XTTS models also are under the [CPML license](https://coqui.ai/cpml))
|
| 42 |
+
|
| 43 |
+
### Build the image yourself
|
| 44 |
+
|
| 45 |
+
To build the Docker container Pytorch 2.1 and CUDA 11.8 :
|
| 46 |
+
|
| 47 |
+
`DOCKERFILE` may be `Dockerfile`, `Dockerfile.cpu`, `Dockerfile.cuda121`, or your own custom Dockerfile.
|
| 48 |
+
|
| 49 |
+
```bash
|
| 50 |
+
$ git clone git@github.com:coqui-ai/xtts-streaming-server.git
|
| 51 |
+
$ cd xtts-streaming-server/server
|
| 52 |
+
$ docker build -t xtts-stream . -f DOCKERFILE
|
| 53 |
+
$ docker run --gpus all -e COQUI_TOS_AGREED=1 --rm -p 8000:80 xtts-stream
|
| 54 |
+
```
|
| 55 |
+
|
| 56 |
+
Setting the `COQUI_TOS_AGREED` environment variable to `1` indicates you have read and agreed to
|
| 57 |
+
the terms of the [CPML license](https://coqui.ai/cpml). (Fine-tuned XTTS models also are under the [CPML license](https://coqui.ai/cpml))
|
| 58 |
+
|
| 59 |
+
## 2) Testing the running server
|
| 60 |
+
|
| 61 |
+
Once your Docker container is running, you can test that it's working properly. You will need to run the following code from a fresh terminal.
|
| 62 |
+
|
| 63 |
+
### Clone `xtts-streaming-server` if you haven't already
|
| 64 |
+
|
| 65 |
+
```bash
|
| 66 |
+
$ git clone git@github.com:coqui-ai/xtts-streaming-server.git
|
| 67 |
+
```
|
| 68 |
+
|
| 69 |
+
### Using the gradio demo
|
| 70 |
+
|
| 71 |
+
```bash
|
| 72 |
+
$ cd xtts-streaming-server
|
| 73 |
+
$ python -m pip install -r test/requirements.txt
|
| 74 |
+
$ python demo.py
|
| 75 |
+
```
|
| 76 |
+
|
| 77 |
+
### Using the test script
|
| 78 |
+
|
| 79 |
+
```bash
|
| 80 |
+
$ cd xtts-streaming-server/test
|
| 81 |
+
$ python -m pip install -r requirements.txt
|
| 82 |
+
$ python test_streaming.py
|
| 83 |
+
```
|
demo.py
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import requests
|
| 3 |
+
import base64
|
| 4 |
+
import tempfile
|
| 5 |
+
import json
|
| 6 |
+
import os
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
SERVER_URL = 'http://localhost:8000'
|
| 10 |
+
OUTPUT = "./demo_outputs"
|
| 11 |
+
cloned_speakers = {}
|
| 12 |
+
|
| 13 |
+
print("Preparing file structure...")
|
| 14 |
+
if not os.path.exists(OUTPUT):
|
| 15 |
+
os.mkdir(OUTPUT)
|
| 16 |
+
os.mkdir(os.path.join(OUTPUT, "cloned_speakers"))
|
| 17 |
+
os.mkdir(os.path.join(OUTPUT, "generated_audios"))
|
| 18 |
+
elif os.path.exists(os.path.join(OUTPUT, "cloned_speakers")):
|
| 19 |
+
print("Loading existing cloned speakers...")
|
| 20 |
+
for file in os.listdir(os.path.join(OUTPUT, "cloned_speakers")):
|
| 21 |
+
if file.endswith(".json"):
|
| 22 |
+
with open(os.path.join(OUTPUT, "cloned_speakers", file), "r") as fp:
|
| 23 |
+
cloned_speakers[file[:-5]] = json.load(fp)
|
| 24 |
+
print("Available cloned speakers:", ", ".join(cloned_speakers.keys()))
|
| 25 |
+
|
| 26 |
+
try:
|
| 27 |
+
print("Getting metadata from server ...")
|
| 28 |
+
LANUGAGES = requests.get(SERVER_URL + "/languages").json()
|
| 29 |
+
print("Available languages:", ", ".join(LANUGAGES))
|
| 30 |
+
STUDIO_SPEAKERS = requests.get(SERVER_URL + "/studio_speakers").json()
|
| 31 |
+
print("Available studio speakers:", ", ".join(STUDIO_SPEAKERS.keys()))
|
| 32 |
+
except:
|
| 33 |
+
raise Exception("Please make sure the server is running first.")
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
def clone_speaker(upload_file, clone_speaker_name, cloned_speaker_names):
|
| 37 |
+
files = {"wav_file": ("reference.wav", open(upload_file, "rb"))}
|
| 38 |
+
embeddings = requests.post(SERVER_URL + "/clone_speaker", files=files).json()
|
| 39 |
+
with open(os.path.join(OUTPUT, "cloned_speakers", clone_speaker_name + ".json"), "w") as fp:
|
| 40 |
+
json.dump(embeddings, fp)
|
| 41 |
+
cloned_speakers[clone_speaker_name] = embeddings
|
| 42 |
+
cloned_speaker_names.append(clone_speaker_name)
|
| 43 |
+
return upload_file, clone_speaker_name, cloned_speaker_names, gr.Dropdown.update(choices=cloned_speaker_names)
|
| 44 |
+
|
| 45 |
+
def tts(text, speaker_type, speaker_name_studio, speaker_name_custom, lang):
|
| 46 |
+
embeddings = STUDIO_SPEAKERS[speaker_name_studio] if speaker_type == 'Studio' else cloned_speakers[speaker_name_custom]
|
| 47 |
+
generated_audio = requests.post(
|
| 48 |
+
SERVER_URL + "/tts",
|
| 49 |
+
json={
|
| 50 |
+
"text": text,
|
| 51 |
+
"language": lang,
|
| 52 |
+
"speaker_embedding": embeddings["speaker_embedding"],
|
| 53 |
+
"gpt_cond_latent": embeddings["gpt_cond_latent"]
|
| 54 |
+
}
|
| 55 |
+
).content
|
| 56 |
+
generated_audio_path = os.path.join("demo_outputs", "generated_audios", next(tempfile._get_candidate_names()) + ".wav")
|
| 57 |
+
with open(generated_audio_path, "wb") as fp:
|
| 58 |
+
fp.write(base64.b64decode(generated_audio))
|
| 59 |
+
return fp.name
|
| 60 |
+
|
| 61 |
+
with gr.Blocks() as demo:
|
| 62 |
+
cloned_speaker_names = gr.State(list(cloned_speakers.keys()))
|
| 63 |
+
with gr.Tab("TTS"):
|
| 64 |
+
with gr.Column() as row4:
|
| 65 |
+
with gr.Row() as col4:
|
| 66 |
+
speaker_name_studio = gr.Dropdown(
|
| 67 |
+
label="Studio speaker",
|
| 68 |
+
choices=STUDIO_SPEAKERS.keys(),
|
| 69 |
+
value="Asya Anara" if "Asya Anara" in STUDIO_SPEAKERS.keys() else None,
|
| 70 |
+
)
|
| 71 |
+
speaker_name_custom = gr.Dropdown(
|
| 72 |
+
label="Cloned speaker",
|
| 73 |
+
choices=cloned_speaker_names.value,
|
| 74 |
+
value=cloned_speaker_names.value[0] if len(cloned_speaker_names.value) != 0 else None,
|
| 75 |
+
)
|
| 76 |
+
speaker_type = gr.Dropdown(label="Speaker type", choices=["Studio", "Cloned"], value="Studio")
|
| 77 |
+
with gr.Column() as col2:
|
| 78 |
+
lang = gr.Dropdown(label="Language", choices=LANUGAGES, value="en")
|
| 79 |
+
text = gr.Textbox(label="text", value="A quick brown fox jumps over the lazy dog.")
|
| 80 |
+
tts_button = gr.Button(value="TTS")
|
| 81 |
+
with gr.Column() as col3:
|
| 82 |
+
generated_audio = gr.Audio(label="Generated audio", autoplay=True)
|
| 83 |
+
with gr.Tab("Clone a new speaker"):
|
| 84 |
+
with gr.Column() as col1:
|
| 85 |
+
upload_file = gr.Audio(label="Upload reference audio", type="filepath")
|
| 86 |
+
clone_speaker_name = gr.Textbox(label="Speaker name", value="default_speaker")
|
| 87 |
+
clone_button = gr.Button(value="Clone speaker")
|
| 88 |
+
|
| 89 |
+
clone_button.click(
|
| 90 |
+
fn=clone_speaker,
|
| 91 |
+
inputs=[upload_file, clone_speaker_name, cloned_speaker_names],
|
| 92 |
+
outputs=[upload_file, clone_speaker_name, cloned_speaker_names, speaker_name_custom],
|
| 93 |
+
)
|
| 94 |
+
|
| 95 |
+
tts_button.click(
|
| 96 |
+
fn=tts,
|
| 97 |
+
inputs=[text, speaker_type, speaker_name_studio, speaker_name_custom, lang],
|
| 98 |
+
outputs=[generated_audio],
|
| 99 |
+
)
|
| 100 |
+
|
| 101 |
+
if __name__ == "__main__":
|
| 102 |
+
print("Warming up server...")
|
| 103 |
+
with open("test/default_speaker.json", "r") as fp:
|
| 104 |
+
warmup_speaker = json.load(fp)
|
| 105 |
+
resp = requests.post(
|
| 106 |
+
SERVER_URL + "/tts",
|
| 107 |
+
json={
|
| 108 |
+
"text": "This is a warmup request.",
|
| 109 |
+
"language": "en",
|
| 110 |
+
"speaker_embedding": warmup_speaker["speaker_embedding"],
|
| 111 |
+
"gpt_cond_latent": warmup_speaker["gpt_cond_latent"],
|
| 112 |
+
}
|
| 113 |
+
)
|
| 114 |
+
resp.raise_for_status()
|
| 115 |
+
print("Starting the demo...")
|
| 116 |
+
demo.launch(
|
| 117 |
+
share=False,
|
| 118 |
+
debug=False,
|
| 119 |
+
server_port=3009,
|
| 120 |
+
server_name="0.0.0.0",
|
| 121 |
+
)
|
server/Dockerfile
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM pytorch/pytorch:2.1.0-cuda11.8-cudnn8-devel
|
| 2 |
+
ARG DEBIAN_FRONTEND=noninteractive
|
| 3 |
+
|
| 4 |
+
RUN apt-get update && \
|
| 5 |
+
apt-get install --no-install-recommends -y sox libsox-fmt-all curl wget gcc git git-lfs build-essential libaio-dev libsndfile1 ssh ffmpeg && \
|
| 6 |
+
apt-get clean && apt-get -y autoremove
|
| 7 |
+
|
| 8 |
+
WORKDIR /app
|
| 9 |
+
COPY requirements.txt .
|
| 10 |
+
RUN python -m pip install --use-deprecated=legacy-resolver -r requirements.txt \
|
| 11 |
+
&& python -m pip cache purge
|
| 12 |
+
|
| 13 |
+
RUN python -m unidic download
|
| 14 |
+
RUN mkdir -p /app/tts_models
|
| 15 |
+
|
| 16 |
+
COPY main.py .
|
| 17 |
+
ENV NVIDIA_DISABLE_REQUIRE=1
|
| 18 |
+
|
| 19 |
+
ENV NUM_THREADS=2
|
| 20 |
+
EXPOSE 80
|
| 21 |
+
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "80"]
|
server/Dockerfile.cpu
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.11.7
|
| 2 |
+
ARG DEBIAN_FRONTEND=noninteractive
|
| 3 |
+
|
| 4 |
+
RUN apt-get update && \
|
| 5 |
+
apt-get install --no-install-recommends -y sox libsox-fmt-all curl wget gcc git git-lfs build-essential libaio-dev libsndfile1 ssh ffmpeg && \
|
| 6 |
+
apt-get clean && apt-get -y autoremove
|
| 7 |
+
|
| 8 |
+
WORKDIR /app
|
| 9 |
+
COPY requirements_cpu.txt .
|
| 10 |
+
RUN python -m pip install --use-deprecated=legacy-resolver -r requirements_cpu.txt \
|
| 11 |
+
&& python -m pip cache purge
|
| 12 |
+
|
| 13 |
+
RUN python -m unidic download
|
| 14 |
+
RUN mkdir -p /app/tts_models
|
| 15 |
+
|
| 16 |
+
COPY main.py .
|
| 17 |
+
ENV USE_CPU=1
|
| 18 |
+
|
| 19 |
+
EXPOSE 80
|
| 20 |
+
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "80"]
|
server/Dockerfile.cuda121
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM pytorch/pytorch:2.1.0-cuda12.1-cudnn8-devel
|
| 2 |
+
ARG DEBIAN_FRONTEND=noninteractive
|
| 3 |
+
|
| 4 |
+
RUN apt-get update && \
|
| 5 |
+
apt-get install --no-install-recommends -y sox libsox-fmt-all curl wget gcc git git-lfs build-essential libaio-dev libsndfile1 ssh ffmpeg && \
|
| 6 |
+
apt-get clean && apt-get -y autoremove
|
| 7 |
+
|
| 8 |
+
WORKDIR /app
|
| 9 |
+
COPY requirements.txt .
|
| 10 |
+
RUN python -m pip install --use-deprecated=legacy-resolver -r requirements.txt \
|
| 11 |
+
&& python -m pip cache purge
|
| 12 |
+
|
| 13 |
+
RUN python -m unidic download
|
| 14 |
+
RUN mkdir -p /app/tts_models
|
| 15 |
+
|
| 16 |
+
COPY main.py .
|
| 17 |
+
|
| 18 |
+
#Mark this 1 if you have older card
|
| 19 |
+
ENV NVIDIA_DISABLE_REQUIRE=0
|
| 20 |
+
|
| 21 |
+
ENV NUM_THREADS=2
|
| 22 |
+
EXPOSE 80
|
| 23 |
+
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "80"]
|
server/main.py
ADDED
|
@@ -0,0 +1,185 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import base64
|
| 2 |
+
import io
|
| 3 |
+
import os
|
| 4 |
+
import tempfile
|
| 5 |
+
import wave
|
| 6 |
+
import torch
|
| 7 |
+
import numpy as np
|
| 8 |
+
from typing import List
|
| 9 |
+
from pydantic import BaseModel
|
| 10 |
+
|
| 11 |
+
from fastapi import FastAPI, UploadFile, Body
|
| 12 |
+
from fastapi.responses import StreamingResponse
|
| 13 |
+
|
| 14 |
+
from TTS.tts.configs.xtts_config import XttsConfig
|
| 15 |
+
from TTS.tts.models.xtts import Xtts
|
| 16 |
+
from TTS.utils.generic_utils import get_user_data_dir
|
| 17 |
+
from TTS.utils.manage import ModelManager
|
| 18 |
+
|
| 19 |
+
torch.set_num_threads(int(os.environ.get("NUM_THREADS", os.cpu_count())))
|
| 20 |
+
device = torch.device("cuda" if os.environ.get("USE_CPU", "0") == "0" else "cpu")
|
| 21 |
+
if not torch.cuda.is_available() and device == "cuda":
|
| 22 |
+
raise RuntimeError("CUDA device unavailable, please use Dockerfile.cpu instead.")
|
| 23 |
+
|
| 24 |
+
custom_model_path = os.environ.get("CUSTOM_MODEL_PATH", "/app/tts_models")
|
| 25 |
+
|
| 26 |
+
if os.path.exists(custom_model_path) and os.path.isfile(custom_model_path + "/config.json"):
|
| 27 |
+
model_path = custom_model_path
|
| 28 |
+
print("Loading custom model from", model_path, flush=True)
|
| 29 |
+
else:
|
| 30 |
+
print("Loading default model", flush=True)
|
| 31 |
+
model_name = "tts_models/multilingual/multi-dataset/xtts_v2"
|
| 32 |
+
print("Downloading XTTS Model:", model_name, flush=True)
|
| 33 |
+
ModelManager().download_model(model_name)
|
| 34 |
+
model_path = os.path.join(get_user_data_dir("tts"), model_name.replace("/", "--"))
|
| 35 |
+
print("XTTS Model downloaded", flush=True)
|
| 36 |
+
|
| 37 |
+
print("Loading XTTS", flush=True)
|
| 38 |
+
config = XttsConfig()
|
| 39 |
+
config.load_json(os.path.join(model_path, "config.json"))
|
| 40 |
+
model = Xtts.init_from_config(config)
|
| 41 |
+
model.load_checkpoint(config, checkpoint_dir=model_path, eval=True, use_deepspeed=True if device == "cuda" else False)
|
| 42 |
+
model.to(device)
|
| 43 |
+
print("XTTS Loaded.", flush=True)
|
| 44 |
+
|
| 45 |
+
print("Running XTTS Server ...", flush=True)
|
| 46 |
+
|
| 47 |
+
##### Run fastapi #####
|
| 48 |
+
app = FastAPI(
|
| 49 |
+
title="XTTS Streaming server",
|
| 50 |
+
description="""XTTS Streaming server""",
|
| 51 |
+
version="0.0.1",
|
| 52 |
+
docs_url="/",
|
| 53 |
+
)
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
@app.post("/clone_speaker")
|
| 57 |
+
def predict_speaker(wav_file: UploadFile):
|
| 58 |
+
"""Compute conditioning inputs from reference audio file."""
|
| 59 |
+
temp_audio_name = next(tempfile._get_candidate_names())
|
| 60 |
+
with open(temp_audio_name, "wb") as temp, torch.inference_mode():
|
| 61 |
+
temp.write(io.BytesIO(wav_file.file.read()).getbuffer())
|
| 62 |
+
gpt_cond_latent, speaker_embedding = model.get_conditioning_latents(
|
| 63 |
+
temp_audio_name
|
| 64 |
+
)
|
| 65 |
+
return {
|
| 66 |
+
"gpt_cond_latent": gpt_cond_latent.cpu().squeeze().half().tolist(),
|
| 67 |
+
"speaker_embedding": speaker_embedding.cpu().squeeze().half().tolist(),
|
| 68 |
+
}
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
def postprocess(wav):
|
| 72 |
+
"""Post process the output waveform"""
|
| 73 |
+
if isinstance(wav, list):
|
| 74 |
+
wav = torch.cat(wav, dim=0)
|
| 75 |
+
wav = wav.clone().detach().cpu().numpy()
|
| 76 |
+
wav = wav[None, : int(wav.shape[0])]
|
| 77 |
+
wav = np.clip(wav, -1, 1)
|
| 78 |
+
wav = (wav * 32767).astype(np.int16)
|
| 79 |
+
return wav
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
def encode_audio_common(
|
| 83 |
+
frame_input, encode_base64=True, sample_rate=24000, sample_width=2, channels=1
|
| 84 |
+
):
|
| 85 |
+
"""Return base64 encoded audio"""
|
| 86 |
+
wav_buf = io.BytesIO()
|
| 87 |
+
with wave.open(wav_buf, "wb") as vfout:
|
| 88 |
+
vfout.setnchannels(channels)
|
| 89 |
+
vfout.setsampwidth(sample_width)
|
| 90 |
+
vfout.setframerate(sample_rate)
|
| 91 |
+
vfout.writeframes(frame_input)
|
| 92 |
+
|
| 93 |
+
wav_buf.seek(0)
|
| 94 |
+
if encode_base64:
|
| 95 |
+
b64_encoded = base64.b64encode(wav_buf.getbuffer()).decode("utf-8")
|
| 96 |
+
return b64_encoded
|
| 97 |
+
else:
|
| 98 |
+
return wav_buf.read()
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
class StreamingInputs(BaseModel):
|
| 102 |
+
speaker_embedding: List[float]
|
| 103 |
+
gpt_cond_latent: List[List[float]]
|
| 104 |
+
text: str
|
| 105 |
+
language: str
|
| 106 |
+
add_wav_header: bool = True
|
| 107 |
+
stream_chunk_size: str = "20"
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
def predict_streaming_generator(parsed_input: dict = Body(...)):
|
| 111 |
+
speaker_embedding = torch.tensor(parsed_input.speaker_embedding).unsqueeze(0).unsqueeze(-1)
|
| 112 |
+
gpt_cond_latent = torch.tensor(parsed_input.gpt_cond_latent).reshape((-1, 1024)).unsqueeze(0)
|
| 113 |
+
text = parsed_input.text
|
| 114 |
+
language = parsed_input.language
|
| 115 |
+
|
| 116 |
+
stream_chunk_size = int(parsed_input.stream_chunk_size)
|
| 117 |
+
add_wav_header = parsed_input.add_wav_header
|
| 118 |
+
|
| 119 |
+
|
| 120 |
+
chunks = model.inference_stream(
|
| 121 |
+
text,
|
| 122 |
+
language,
|
| 123 |
+
gpt_cond_latent,
|
| 124 |
+
speaker_embedding,
|
| 125 |
+
stream_chunk_size=stream_chunk_size,
|
| 126 |
+
enable_text_splitting=True
|
| 127 |
+
)
|
| 128 |
+
|
| 129 |
+
for i, chunk in enumerate(chunks):
|
| 130 |
+
chunk = postprocess(chunk)
|
| 131 |
+
if i == 0 and add_wav_header:
|
| 132 |
+
yield encode_audio_common(b"", encode_base64=False)
|
| 133 |
+
yield chunk.tobytes()
|
| 134 |
+
else:
|
| 135 |
+
yield chunk.tobytes()
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
@app.post("/tts_stream")
|
| 139 |
+
def predict_streaming_endpoint(parsed_input: StreamingInputs):
|
| 140 |
+
return StreamingResponse(
|
| 141 |
+
predict_streaming_generator(parsed_input),
|
| 142 |
+
media_type="audio/wav",
|
| 143 |
+
)
|
| 144 |
+
|
| 145 |
+
class TTSInputs(BaseModel):
|
| 146 |
+
speaker_embedding: List[float]
|
| 147 |
+
gpt_cond_latent: List[List[float]]
|
| 148 |
+
text: str
|
| 149 |
+
language: str
|
| 150 |
+
|
| 151 |
+
@app.post("/tts")
|
| 152 |
+
def predict_speech(parsed_input: TTSInputs):
|
| 153 |
+
speaker_embedding = torch.tensor(parsed_input.speaker_embedding).unsqueeze(0).unsqueeze(-1)
|
| 154 |
+
gpt_cond_latent = torch.tensor(parsed_input.gpt_cond_latent).reshape((-1, 1024)).unsqueeze(0)
|
| 155 |
+
text = parsed_input.text
|
| 156 |
+
language = parsed_input.language
|
| 157 |
+
|
| 158 |
+
out = model.inference(
|
| 159 |
+
text,
|
| 160 |
+
language,
|
| 161 |
+
gpt_cond_latent,
|
| 162 |
+
speaker_embedding,
|
| 163 |
+
)
|
| 164 |
+
|
| 165 |
+
wav = postprocess(torch.tensor(out["wav"]))
|
| 166 |
+
|
| 167 |
+
return encode_audio_common(wav.tobytes())
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
@app.get("/studio_speakers")
|
| 171 |
+
def get_speakers():
|
| 172 |
+
if hasattr(model, "speaker_manager") and hasattr(model.speaker_manager, "speakers"):
|
| 173 |
+
return {
|
| 174 |
+
speaker: {
|
| 175 |
+
"speaker_embedding": model.speaker_manager.speakers[speaker]["speaker_embedding"].cpu().squeeze().half().tolist(),
|
| 176 |
+
"gpt_cond_latent": model.speaker_manager.speakers[speaker]["gpt_cond_latent"].cpu().squeeze().half().tolist(),
|
| 177 |
+
}
|
| 178 |
+
for speaker in model.speaker_manager.speakers.keys()
|
| 179 |
+
}
|
| 180 |
+
else:
|
| 181 |
+
return {}
|
| 182 |
+
|
| 183 |
+
@app.get("/languages")
|
| 184 |
+
def get_languages():
|
| 185 |
+
return config.languages
|
server/requirements.txt
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
TTS @ git+https://github.com/coqui-ai/TTS@fa28f99f1508b5b5366539b2149963edcb80ba62
|
| 2 |
+
uvicorn[standard]==0.23.2
|
| 3 |
+
fastapi==0.95.2
|
| 4 |
+
deepspeed==0.10.3
|
| 5 |
+
pydantic==1.10.13
|
| 6 |
+
python-multipart==0.0.6
|
| 7 |
+
typing-extensions>=4.8.0
|
| 8 |
+
numpy==1.24.3
|
| 9 |
+
cutlet
|
| 10 |
+
mecab-python3==1.0.6
|
| 11 |
+
unidic-lite==1.0.8
|
| 12 |
+
unidic==1.1.0
|
server/requirements_cpu.txt
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
TTS @ git+https://github.com/coqui-ai/TTS@fa28f99f1508b5b5366539b2149963edcb80ba62
|
| 2 |
+
uvicorn[standard]==0.23.2
|
| 3 |
+
fastapi==0.95.2
|
| 4 |
+
pydantic==1.10.13
|
| 5 |
+
python-multipart==0.0.6
|
| 6 |
+
typing-extensions>=4.8.0
|
| 7 |
+
numpy==1.24.3
|
| 8 |
+
cutlet
|
| 9 |
+
mecab-python3==1.0.6
|
| 10 |
+
unidic-lite==1.0.8
|
| 11 |
+
unidic==1.1.0
|
test/default_speaker.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
test/requirements.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
requests==2.31.0
|
| 2 |
+
gradio==3.50.2
|
test/test_streaming.py
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
import json
|
| 3 |
+
import shutil
|
| 4 |
+
import subprocess
|
| 5 |
+
import sys
|
| 6 |
+
import time
|
| 7 |
+
from typing import Iterator
|
| 8 |
+
|
| 9 |
+
import requests
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def is_installed(lib_name: str) -> bool:
|
| 13 |
+
lib = shutil.which(lib_name)
|
| 14 |
+
if lib is None:
|
| 15 |
+
return False
|
| 16 |
+
return True
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def save(audio: bytes, filename: str) -> None:
|
| 20 |
+
with open(filename, "wb") as f:
|
| 21 |
+
f.write(audio)
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def stream_ffplay(audio_stream, output_file, save=True):
|
| 25 |
+
if not save:
|
| 26 |
+
ffplay_cmd = ["ffplay", "-nodisp", "-probesize", "1024", "-autoexit", "-"]
|
| 27 |
+
else:
|
| 28 |
+
print("Saving to ", output_file)
|
| 29 |
+
ffplay_cmd = ["ffmpeg", "-probesize", "1024", "-i", "-", output_file]
|
| 30 |
+
|
| 31 |
+
ffplay_proc = subprocess.Popen(ffplay_cmd, stdin=subprocess.PIPE)
|
| 32 |
+
for chunk in audio_stream:
|
| 33 |
+
if chunk is not None:
|
| 34 |
+
ffplay_proc.stdin.write(chunk)
|
| 35 |
+
|
| 36 |
+
# close on finish
|
| 37 |
+
ffplay_proc.stdin.close()
|
| 38 |
+
ffplay_proc.wait()
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def tts(text, speaker, language, server_url, stream_chunk_size) -> Iterator[bytes]:
|
| 42 |
+
start = time.perf_counter()
|
| 43 |
+
speaker["text"] = text
|
| 44 |
+
speaker["language"] = language
|
| 45 |
+
speaker["stream_chunk_size"] = stream_chunk_size # you can reduce it to get faster response, but degrade quality
|
| 46 |
+
res = requests.post(
|
| 47 |
+
f"{server_url}/tts_stream",
|
| 48 |
+
json=speaker,
|
| 49 |
+
stream=True,
|
| 50 |
+
)
|
| 51 |
+
end = time.perf_counter()
|
| 52 |
+
print(f"Time to make POST: {end-start}s", file=sys.stderr)
|
| 53 |
+
|
| 54 |
+
if res.status_code != 200:
|
| 55 |
+
print("Error:", res.text)
|
| 56 |
+
sys.exit(1)
|
| 57 |
+
|
| 58 |
+
first = True
|
| 59 |
+
for chunk in res.iter_content(chunk_size=512):
|
| 60 |
+
if first:
|
| 61 |
+
end = time.perf_counter()
|
| 62 |
+
print(f"Time to first chunk: {end-start}s", file=sys.stderr)
|
| 63 |
+
first = False
|
| 64 |
+
if chunk:
|
| 65 |
+
yield chunk
|
| 66 |
+
|
| 67 |
+
print("⏱️ response.elapsed:", res.elapsed)
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
def get_speaker(ref_audio,server_url):
|
| 71 |
+
files = {"wav_file": ("reference.wav", open(ref_audio, "rb"))}
|
| 72 |
+
response = requests.post(f"{server_url}/clone_speaker", files=files)
|
| 73 |
+
return response.json()
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
if __name__ == "__main__":
|
| 77 |
+
parser = argparse.ArgumentParser()
|
| 78 |
+
parser.add_argument(
|
| 79 |
+
"--text",
|
| 80 |
+
default="It took me quite a long time to develop a voice and now that I have it I am not going to be silent.",
|
| 81 |
+
help="text input for TTS"
|
| 82 |
+
)
|
| 83 |
+
parser.add_argument(
|
| 84 |
+
"--language",
|
| 85 |
+
default="en",
|
| 86 |
+
help="Language to use default is 'en' (English)"
|
| 87 |
+
)
|
| 88 |
+
parser.add_argument(
|
| 89 |
+
"--output_file",
|
| 90 |
+
default=None,
|
| 91 |
+
help="Save TTS output to given filename"
|
| 92 |
+
)
|
| 93 |
+
parser.add_argument(
|
| 94 |
+
"--ref_file",
|
| 95 |
+
default=None,
|
| 96 |
+
help="Reference audio file to use, when not given will use default"
|
| 97 |
+
)
|
| 98 |
+
parser.add_argument(
|
| 99 |
+
"--server_url",
|
| 100 |
+
default="http://localhost:8000",
|
| 101 |
+
help="Server url http://localhost:8000 default, change to your server location "
|
| 102 |
+
)
|
| 103 |
+
parser.add_argument(
|
| 104 |
+
"--stream_chunk_size",
|
| 105 |
+
default="20",
|
| 106 |
+
help="Stream chunk size , 20 default, reducing will get faster latency but may degrade quality"
|
| 107 |
+
)
|
| 108 |
+
args = parser.parse_args()
|
| 109 |
+
|
| 110 |
+
with open("./default_speaker.json", "r") as file:
|
| 111 |
+
speaker = json.load(file)
|
| 112 |
+
|
| 113 |
+
if args.ref_file is not None:
|
| 114 |
+
print("Computing the latents for a new reference...")
|
| 115 |
+
speaker = get_speaker(args.ref_file, args.server_url)
|
| 116 |
+
|
| 117 |
+
audio = stream_ffplay(
|
| 118 |
+
tts(
|
| 119 |
+
args.text,
|
| 120 |
+
speaker,
|
| 121 |
+
args.language,
|
| 122 |
+
args.server_url,
|
| 123 |
+
args.stream_chunk_size
|
| 124 |
+
),
|
| 125 |
+
args.output_file,
|
| 126 |
+
save=bool(args.output_file)
|
| 127 |
+
)
|