Spaces:
Running
on
Zero
Running
on
Zero
Upload 11 files
Browse files- .gitignore +11 -0
- .python-version +1 -0
- LICENSE +201 -0
- README.md +81 -13
- gradio_app.py +446 -0
- main.py +75 -0
- pyproject.toml +28 -0
- requirements.txt +14 -0
- server_manager.py +468 -0
- tts_server.py +1165 -0
- uv.lock +0 -0
.gitignore
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python-generated files
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[oc]
|
| 4 |
+
build/
|
| 5 |
+
dist/
|
| 6 |
+
wheels/
|
| 7 |
+
*.egg-info
|
| 8 |
+
|
| 9 |
+
# Virtual environments
|
| 10 |
+
.venv
|
| 11 |
+
output_audio/
|
.python-version
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
3.12
|
LICENSE
ADDED
|
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Apache License
|
| 2 |
+
Version 2.0, January 2004
|
| 3 |
+
http://www.apache.org/licenses/
|
| 4 |
+
|
| 5 |
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
| 6 |
+
|
| 7 |
+
1. Definitions.
|
| 8 |
+
|
| 9 |
+
"License" shall mean the terms and conditions for use, reproduction,
|
| 10 |
+
and distribution as defined by Sections 1 through 9 of this document.
|
| 11 |
+
|
| 12 |
+
"Licensor" shall mean the copyright owner or entity authorized by
|
| 13 |
+
the copyright owner that is granting the License.
|
| 14 |
+
|
| 15 |
+
"Legal Entity" shall mean the union of the acting entity and all
|
| 16 |
+
other entities that control, are controlled by, or are under common
|
| 17 |
+
control with that entity. For the purposes of this definition,
|
| 18 |
+
"control" means (i) the power, direct or indirect, to cause the
|
| 19 |
+
direction or management of such entity, whether by contract or
|
| 20 |
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
| 21 |
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
| 22 |
+
|
| 23 |
+
"You" (or "Your") shall mean an individual or Legal Entity
|
| 24 |
+
exercising permissions granted by this License.
|
| 25 |
+
|
| 26 |
+
"Source" form shall mean the preferred form for making modifications,
|
| 27 |
+
including but not limited to software source code, documentation
|
| 28 |
+
source, and configuration files.
|
| 29 |
+
|
| 30 |
+
"Object" form shall mean any form resulting from mechanical
|
| 31 |
+
transformation or translation of a Source form, including but
|
| 32 |
+
not limited to compiled object code, generated documentation,
|
| 33 |
+
and conversions to other media types.
|
| 34 |
+
|
| 35 |
+
"Work" shall mean the work of authorship, whether in Source or
|
| 36 |
+
Object form, made available under the License, as indicated by a
|
| 37 |
+
copyright notice that is included in or attached to the work
|
| 38 |
+
(an example is provided in the Appendix below).
|
| 39 |
+
|
| 40 |
+
"Derivative Works" shall mean any work, whether in Source or Object
|
| 41 |
+
form, that is based on (or derived from) the Work and for which the
|
| 42 |
+
editorial revisions, annotations, elaborations, or other modifications
|
| 43 |
+
represent, as a whole, an original work of authorship. For the purposes
|
| 44 |
+
of this License, Derivative Works shall not include works that remain
|
| 45 |
+
separable from, or merely link (or bind by name) to the interfaces of,
|
| 46 |
+
the Work and Derivative Works thereof.
|
| 47 |
+
|
| 48 |
+
"Contribution" shall mean any work of authorship, including
|
| 49 |
+
the original version of the Work and any modifications or additions
|
| 50 |
+
to that Work or Derivative Works thereof, that is intentionally
|
| 51 |
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
| 52 |
+
or by an individual or Legal Entity authorized to submit on behalf of
|
| 53 |
+
the copyright owner. For the purposes of this definition, "submitted"
|
| 54 |
+
means any form of electronic, verbal, or written communication sent
|
| 55 |
+
to the Licensor or its representatives, including but not limited to
|
| 56 |
+
communication on electronic mailing lists, source code control systems,
|
| 57 |
+
and issue tracking systems that are managed by, or on behalf of, the
|
| 58 |
+
Licensor for the purpose of discussing and improving the Work, but
|
| 59 |
+
excluding communication that is conspicuously marked or otherwise
|
| 60 |
+
designated in writing by the copyright owner as "Not a Contribution."
|
| 61 |
+
|
| 62 |
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
| 63 |
+
on behalf of whom a Contribution has been received by Licensor and
|
| 64 |
+
subsequently incorporated within the Work.
|
| 65 |
+
|
| 66 |
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
| 67 |
+
this License, each Contributor hereby grants to You a perpetual,
|
| 68 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
| 69 |
+
copyright license to reproduce, prepare Derivative Works of,
|
| 70 |
+
publicly display, publicly perform, sublicense, and distribute the
|
| 71 |
+
Work and such Derivative Works in Source or Object form.
|
| 72 |
+
|
| 73 |
+
3. Grant of Patent License. Subject to the terms and conditions of
|
| 74 |
+
this License, each Contributor hereby grants to You a perpetual,
|
| 75 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
| 76 |
+
(except as stated in this section) patent license to make, have made,
|
| 77 |
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
| 78 |
+
where such license applies only to those patent claims licensable
|
| 79 |
+
by such Contributor that are necessarily infringed by their
|
| 80 |
+
Contribution(s) alone or by combination of their Contribution(s)
|
| 81 |
+
with the Work to which such Contribution(s) was submitted. If You
|
| 82 |
+
institute patent litigation against any entity (including a
|
| 83 |
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
| 84 |
+
or a Contribution incorporated within the Work constitutes direct
|
| 85 |
+
or contributory patent infringement, then any patent licenses
|
| 86 |
+
granted to You under this License for that Work shall terminate
|
| 87 |
+
as of the date such litigation is filed.
|
| 88 |
+
|
| 89 |
+
4. Redistribution. You may reproduce and distribute copies of the
|
| 90 |
+
Work or Derivative Works thereof in any medium, with or without
|
| 91 |
+
modifications, and in Source or Object form, provided that You
|
| 92 |
+
meet the following conditions:
|
| 93 |
+
|
| 94 |
+
(a) You must give any other recipients of the Work or
|
| 95 |
+
Derivative Works a copy of this License; and
|
| 96 |
+
|
| 97 |
+
(b) You must cause any modified files to carry prominent notices
|
| 98 |
+
stating that You changed the files; and
|
| 99 |
+
|
| 100 |
+
(c) You must retain, in the Source form of any Derivative Works
|
| 101 |
+
that You distribute, all copyright, patent, trademark, and
|
| 102 |
+
attribution notices from the Source form of the Work,
|
| 103 |
+
excluding those notices that do not pertain to any part of
|
| 104 |
+
the Derivative Works; and
|
| 105 |
+
|
| 106 |
+
(d) If the Work includes a "NOTICE" text file as part of its
|
| 107 |
+
distribution, then any Derivative Works that You distribute must
|
| 108 |
+
include a readable copy of the attribution notices contained
|
| 109 |
+
within such NOTICE file, excluding those notices that do not
|
| 110 |
+
pertain to any part of the Derivative Works, in at least one
|
| 111 |
+
of the following places: within a NOTICE text file distributed
|
| 112 |
+
as part of the Derivative Works; within the Source form or
|
| 113 |
+
documentation, if provided along with the Derivative Works; or,
|
| 114 |
+
within a display generated by the Derivative Works, if and
|
| 115 |
+
wherever such third-party notices normally appear. The contents
|
| 116 |
+
of the NOTICE file are for informational purposes only and
|
| 117 |
+
do not modify the License. You may add Your own attribution
|
| 118 |
+
notices within Derivative Works that You distribute, alongside
|
| 119 |
+
or as an addendum to the NOTICE text from the Work, provided
|
| 120 |
+
that such additional attribution notices cannot be construed
|
| 121 |
+
as modifying the License.
|
| 122 |
+
|
| 123 |
+
You may add Your own copyright statement to Your modifications and
|
| 124 |
+
may provide additional or different license terms and conditions
|
| 125 |
+
for use, reproduction, or distribution of Your modifications, or
|
| 126 |
+
for any such Derivative Works as a whole, provided Your use,
|
| 127 |
+
reproduction, and distribution of the Work otherwise complies with
|
| 128 |
+
the conditions stated in this License.
|
| 129 |
+
|
| 130 |
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
| 131 |
+
any Contribution intentionally submitted for inclusion in the Work
|
| 132 |
+
by You to the Licensor shall be under the terms and conditions of
|
| 133 |
+
this License, without any additional terms or conditions.
|
| 134 |
+
Notwithstanding the above, nothing herein shall supersede or modify
|
| 135 |
+
the terms of any separate license agreement you may have executed
|
| 136 |
+
with Licensor regarding such Contributions.
|
| 137 |
+
|
| 138 |
+
6. Trademarks. This License does not grant permission to use the trade
|
| 139 |
+
names, trademarks, service marks, or product names of the Licensor,
|
| 140 |
+
except as required for reasonable and customary use in describing the
|
| 141 |
+
origin of the Work and reproducing the content of the NOTICE file.
|
| 142 |
+
|
| 143 |
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
| 144 |
+
agreed to in writing, Licensor provides the Work (and each
|
| 145 |
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
| 146 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
| 147 |
+
implied, including, without limitation, any warranties or conditions
|
| 148 |
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
| 149 |
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
| 150 |
+
appropriateness of using or redistributing the Work and assume any
|
| 151 |
+
risks associated with Your exercise of permissions under this License.
|
| 152 |
+
|
| 153 |
+
8. Limitation of Liability. In no event and under no legal theory,
|
| 154 |
+
whether in tort (including negligence), contract, or otherwise,
|
| 155 |
+
unless required by applicable law (such as deliberate and grossly
|
| 156 |
+
negligent acts) or agreed to in writing, shall any Contributor be
|
| 157 |
+
liable to You for damages, including any direct, indirect, special,
|
| 158 |
+
incidental, or consequential damages of any character arising as a
|
| 159 |
+
result of this License or out of the use or inability to use the
|
| 160 |
+
Work (including but not limited to damages for loss of goodwill,
|
| 161 |
+
work stoppage, computer failure or malfunction, or any and all
|
| 162 |
+
other commercial damages or losses), even if such Contributor
|
| 163 |
+
has been advised of the possibility of such damages.
|
| 164 |
+
|
| 165 |
+
9. Accepting Warranty or Additional Liability. While redistributing
|
| 166 |
+
the Work or Derivative Works thereof, You may choose to offer,
|
| 167 |
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
| 168 |
+
or other liability obligations and/or rights consistent with this
|
| 169 |
+
License. However, in accepting such obligations, You may act only
|
| 170 |
+
on Your own behalf and on Your sole responsibility, not on behalf
|
| 171 |
+
of any other Contributor, and only if You agree to indemnify,
|
| 172 |
+
defend, and hold each Contributor harmless for any liability
|
| 173 |
+
incurred by, or claims asserted against, such Contributor by reason
|
| 174 |
+
of your accepting any such warranty or additional liability.
|
| 175 |
+
|
| 176 |
+
END OF TERMS AND CONDITIONS
|
| 177 |
+
|
| 178 |
+
APPENDIX: How to apply the Apache License to your work.
|
| 179 |
+
|
| 180 |
+
To apply the Apache License to your work, attach the following
|
| 181 |
+
boilerplate notice, with the fields enclosed by brackets "[]"
|
| 182 |
+
replaced with your own identifying information. (Don't include
|
| 183 |
+
the brackets!) The text should be enclosed in the appropriate
|
| 184 |
+
comment syntax for the file format. We also recommend that a
|
| 185 |
+
file or class name and description of purpose be included on the
|
| 186 |
+
same "printed page" as the copyright notice for easier
|
| 187 |
+
identification within third-party archives.
|
| 188 |
+
|
| 189 |
+
Copyright [yyyy] [name of copyright owner]
|
| 190 |
+
|
| 191 |
+
Licensed under the Apache License, Version 2.0 (the "License");
|
| 192 |
+
you may not use this file except in compliance with the License.
|
| 193 |
+
You may obtain a copy of the License at
|
| 194 |
+
|
| 195 |
+
http://www.apache.org/licenses/LICENSE-2.0
|
| 196 |
+
|
| 197 |
+
Unless required by applicable law or agreed to in writing, software
|
| 198 |
+
distributed under the License is distributed on an "AS IS" BASIS,
|
| 199 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 200 |
+
See the License for the specific language governing permissions and
|
| 201 |
+
limitations under the License.
|
README.md
CHANGED
|
@@ -1,13 +1,81 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 🎙️ VieNeu-TTS API Server
|
| 2 |
+
|
| 3 |
+
**Vietnamese Text-to-Speech API với 10 giọng nói tự nhiên**
|
| 4 |
+
|
| 5 |
+
## 🚀 Quick Start
|
| 6 |
+
|
| 7 |
+
### 1. Khởi động Server
|
| 8 |
+
```bash
|
| 9 |
+
start_server.bat
|
| 10 |
+
```
|
| 11 |
+
|
| 12 |
+
### 2. Test API
|
| 13 |
+
```bash
|
| 14 |
+
curl -X POST "http://127.0.0.1:8000/fast-tts" \
|
| 15 |
+
-H "Content-Type: application/json" \
|
| 16 |
+
-d '{
|
| 17 |
+
"text": "Xin chào, tôi là VieNeu-TTS",
|
| 18 |
+
"voice_choice": "Tuyên (nam miền Bắc)",
|
| 19 |
+
"return_base64": true
|
| 20 |
+
}'
|
| 21 |
+
```
|
| 22 |
+
|
| 23 |
+
### 3. Sử dụng trong Python
|
| 24 |
+
```python
|
| 25 |
+
import requests
|
| 26 |
+
import base64
|
| 27 |
+
|
| 28 |
+
# Gửi request
|
| 29 |
+
response = requests.post("http://127.0.0.1:8000/fast-tts", json={
|
| 30 |
+
"text": "Xin chào từ VieNeu-TTS",
|
| 31 |
+
"voice_choice": "Ngọc (nữ miền Bắc)",
|
| 32 |
+
"return_base64": True
|
| 33 |
+
})
|
| 34 |
+
|
| 35 |
+
# Lưu audio
|
| 36 |
+
if response.status_code == 200:
|
| 37 |
+
result = response.json()
|
| 38 |
+
audio_bytes = base64.b64decode(result["audio_base64"])
|
| 39 |
+
with open("output.wav", "wb") as f:
|
| 40 |
+
f.write(audio_bytes)
|
| 41 |
+
```
|
| 42 |
+
|
| 43 |
+
## 📡 API Endpoints
|
| 44 |
+
|
| 45 |
+
| Endpoint | Method | Description |
|
| 46 |
+
|----------|--------|-------------|
|
| 47 |
+
| `/fast-tts` | POST | **TTS nhanh** (recommended) |
|
| 48 |
+
| `/tts` | POST | TTS standard |
|
| 49 |
+
| `/bulk-tts` | POST | Xử lý batch (max 50) |
|
| 50 |
+
| `/voices` | GET | Danh sách giọng nói |
|
| 51 |
+
| `/health` | GET | Health check |
|
| 52 |
+
| `/docs` | GET | **API Documentation** |
|
| 53 |
+
|
| 54 |
+
## 🎭 Giọng nói có sẵn
|
| 55 |
+
|
| 56 |
+
- **Nam miền Bắc**: Tuyên, Bình
|
| 57 |
+
- **Nam miền Nam**: Vĩnh, Nguyên, Sơn
|
| 58 |
+
- **Nữ miền Bắc**: Ngọc, Ly
|
| 59 |
+
- **Nữ miền Nam**: Đoan, Dung
|
| 60 |
+
- **Đặc biệt**: Nhỏ Ngọt Ngào
|
| 61 |
+
|
| 62 |
+
## 🔧 Cấu hình
|
| 63 |
+
|
| 64 |
+
- **Server**: http://127.0.0.1:8000
|
| 65 |
+
- **GUI**: Tích hợp sẵn khi chạy server
|
| 66 |
+
- **Performance**: Có thể điều chỉnh từ giao diện
|
| 67 |
+
- **Cache**: Tự động cache giọng nói
|
| 68 |
+
|
| 69 |
+
## 📚 Tài liệu đầy đủ
|
| 70 |
+
|
| 71 |
+
Xem [API_DOCUMENTATION.md](API_DOCUMENTATION.md) để biết chi tiết.
|
| 72 |
+
|
| 73 |
+
## 💡 Tips
|
| 74 |
+
|
| 75 |
+
1. **Dùng `/fast-tts`** thay vì `/tts`
|
| 76 |
+
2. **Set `return_base64=true`** để tránh file I/O
|
| 77 |
+
3. **Cache kết quả** theo text + voice + speed
|
| 78 |
+
4. **Dùng cùng giọng** cho requests liên tiếp
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
**🚀 Bắt đầu ngay với `start_server.bat`!**
|
gradio_app.py
ADDED
|
@@ -0,0 +1,446 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import soundfile as sf
|
| 3 |
+
import tempfile
|
| 4 |
+
import torch
|
| 5 |
+
from vieneu_tts import VieNeuTTS
|
| 6 |
+
import os
|
| 7 |
+
import time
|
| 8 |
+
import threading
|
| 9 |
+
import pickle
|
| 10 |
+
import hashlib
|
| 11 |
+
import numpy as np
|
| 12 |
+
from pydub import AudioSegment
|
| 13 |
+
|
| 14 |
+
print("🎚️ Speed Control: Sử dụng Pydub (đơn giản, sạch, không vang)")
|
| 15 |
+
|
| 16 |
+
print("⏳ Đang khởi động VieNeu-TTS...")
|
| 17 |
+
|
| 18 |
+
# --- 1. SETUP MODEL ---
|
| 19 |
+
print("📦 Đang tải model...")
|
| 20 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 21 |
+
print(f"🖥️ Sử dụng thiết bị: {device.upper()}")
|
| 22 |
+
|
| 23 |
+
# Cache cho reference codes
|
| 24 |
+
CACHE_DIR = "./reference_cache"
|
| 25 |
+
os.makedirs(CACHE_DIR, exist_ok=True)
|
| 26 |
+
|
| 27 |
+
reference_cache = {}
|
| 28 |
+
reference_cache_lock = threading.Lock()
|
| 29 |
+
|
| 30 |
+
def get_cache_path(cache_key):
|
| 31 |
+
"""Tạo đường dẫn file cache từ key"""
|
| 32 |
+
# Hash key để tạo tên file an toàn
|
| 33 |
+
key_hash = hashlib.md5(cache_key.encode()).hexdigest()
|
| 34 |
+
return os.path.join(CACHE_DIR, f"{key_hash}.pkl")
|
| 35 |
+
|
| 36 |
+
def load_cache_from_disk(cache_key):
|
| 37 |
+
"""Load cache từ disk nếu có"""
|
| 38 |
+
cache_path = get_cache_path(cache_key)
|
| 39 |
+
if os.path.exists(cache_path):
|
| 40 |
+
try:
|
| 41 |
+
with open(cache_path, 'rb') as f:
|
| 42 |
+
return pickle.load(f)
|
| 43 |
+
except:
|
| 44 |
+
return None
|
| 45 |
+
return None
|
| 46 |
+
|
| 47 |
+
def save_cache_to_disk(cache_key, ref_codes):
|
| 48 |
+
"""Lưu cache xuống disk"""
|
| 49 |
+
cache_path = get_cache_path(cache_key)
|
| 50 |
+
try:
|
| 51 |
+
with open(cache_path, 'wb') as f:
|
| 52 |
+
pickle.dump(ref_codes, f)
|
| 53 |
+
# Lưu metadata để dễ debug
|
| 54 |
+
meta_path = cache_path.replace('.pkl', '.txt')
|
| 55 |
+
with open(meta_path, 'w', encoding='utf-8') as f:
|
| 56 |
+
f.write(f"Cache key: {cache_key}\n")
|
| 57 |
+
f.write(f"Created: {time.strftime('%Y-%m-%d %H:%M:%S')}\n")
|
| 58 |
+
except Exception as e:
|
| 59 |
+
print(f" ⚠️ Không thể lưu cache: {e}")
|
| 60 |
+
|
| 61 |
+
try:
|
| 62 |
+
tts = VieNeuTTS(
|
| 63 |
+
backbone_repo="pnnbao-ump/VieNeu-TTS",
|
| 64 |
+
backbone_device=device,
|
| 65 |
+
codec_repo="neuphonic/neucodec",
|
| 66 |
+
codec_device=device
|
| 67 |
+
)
|
| 68 |
+
print("✅ Model đã tải xong!")
|
| 69 |
+
|
| 70 |
+
# Kiểm tra device thực tế
|
| 71 |
+
if hasattr(tts, 'backbone'):
|
| 72 |
+
if hasattr(tts.backbone, 'device'):
|
| 73 |
+
print(f" 📍 Backbone device: {tts.backbone.device}")
|
| 74 |
+
else:
|
| 75 |
+
# For transformers model
|
| 76 |
+
print(f" 📍 Backbone device: {next(tts.backbone.parameters()).device}")
|
| 77 |
+
|
| 78 |
+
if hasattr(tts, 'codec'):
|
| 79 |
+
if hasattr(tts.codec, 'device'):
|
| 80 |
+
print(f" 📍 Codec device: {tts.codec.device}")
|
| 81 |
+
else:
|
| 82 |
+
print(f" 📍 Codec device: {next(tts.codec.parameters()).device}")
|
| 83 |
+
|
| 84 |
+
print(f" 💾 GPU Memory allocated: {torch.cuda.memory_allocated(0) / 1024**3:.2f} GB" if torch.cuda.is_available() else "")
|
| 85 |
+
|
| 86 |
+
except Exception as e:
|
| 87 |
+
print(f"⚠️ Không thể tải model (Chế độ UI Demo): {e}")
|
| 88 |
+
class MockTTS:
|
| 89 |
+
def encode_reference(self, path): return None
|
| 90 |
+
def infer(self, text, ref, ref_text):
|
| 91 |
+
import numpy as np
|
| 92 |
+
# Giả lập độ trễ để test tính năng đo thời gian
|
| 93 |
+
time.sleep(1.5)
|
| 94 |
+
return np.random.uniform(-0.5, 0.5, 24000*3)
|
| 95 |
+
tts = MockTTS()
|
| 96 |
+
|
| 97 |
+
# --- 2. DATA ---
|
| 98 |
+
VOICE_SAMPLES = {
|
| 99 |
+
"Tuyên (nam miền Bắc)": {"audio": "./sample/Tuyên (nam miền Bắc).wav", "text": "./sample/Tuyên (nam miền Bắc).txt"},
|
| 100 |
+
"Vĩnh (nam miền Nam)": {"audio": "./sample/Vĩnh (nam miền Nam).wav", "text": "./sample/Vĩnh (nam miền Nam).txt"},
|
| 101 |
+
"Bình (nam miền Bắc)": {"audio": "./sample/Bình (nam miền Bắc).wav", "text": "./sample/Bình (nam miền Bắc).txt"},
|
| 102 |
+
"Nguyên (nam miền Nam)": {"audio": "./sample/Nguyên (nam miền Nam).wav", "text": "./sample/Nguyên (nam miền Nam).txt"},
|
| 103 |
+
"Sơn (nam miền Nam)": {"audio": "./sample/Sơn (nam miền Nam).wav", "text": "./sample/Sơn (nam miền Nam).txt"},
|
| 104 |
+
"Đoan (nữ miền Nam)": {"audio": "./sample/Đoan (nữ miền Nam).wav", "text": "./sample/Đoan (nữ miền Nam).txt"},
|
| 105 |
+
"Ngọc (nữ miền Bắc)": {"audio": "./sample/Ngọc (nữ miền Bắc).wav", "text": "./sample/Ngọc (nữ miền Bắc).txt"},
|
| 106 |
+
"Ly (nữ miền Bắc)": {"audio": "./sample/Ly (nữ miền Bắc).wav", "text": "./sample/Ly (nữ miền Bắc).txt"},
|
| 107 |
+
"Dung (nữ miền Nam)": {"audio": "./sample/Dung (nữ miền Nam).wav", "text": "./sample/Dung (nữ miền Nam).txt"},
|
| 108 |
+
"Nhỏ Ngọt Ngào": {"audio": "./sample/Nhỏ Ngọt Ngào.wav", "text": "./sample/Nhỏ Ngọt Ngào.txt"},
|
| 109 |
+
|
| 110 |
+
# Thêm giọng mới ở đây:
|
| 111 |
+
# "Tên Giọng": {"audio": "./sample/Tên_Giọng.wav", "text": "./sample/Tên_Giọng.txt"},
|
| 112 |
+
}
|
| 113 |
+
|
| 114 |
+
# --- 3. HELPER FUNCTIONS ---
|
| 115 |
+
def load_reference_info(voice_choice):
|
| 116 |
+
if voice_choice in VOICE_SAMPLES:
|
| 117 |
+
audio_path = VOICE_SAMPLES[voice_choice]["audio"]
|
| 118 |
+
text_path = VOICE_SAMPLES[voice_choice]["text"]
|
| 119 |
+
try:
|
| 120 |
+
if os.path.exists(text_path):
|
| 121 |
+
with open(text_path, "r", encoding="utf-8") as f:
|
| 122 |
+
ref_text = f.read()
|
| 123 |
+
return audio_path, ref_text
|
| 124 |
+
else:
|
| 125 |
+
return audio_path, "⚠️ Không tìm thấy file text mẫu."
|
| 126 |
+
except Exception as e:
|
| 127 |
+
return None, f"❌ Lỗi: {str(e)}"
|
| 128 |
+
return None, ""
|
| 129 |
+
|
| 130 |
+
def synthesize_speech(text, voice_choice, custom_audio, custom_text, mode_tab, speed_factor):
|
| 131 |
+
try:
|
| 132 |
+
if not text or text.strip() == "":
|
| 133 |
+
return None, "⚠️ Vui lòng nhập văn bản cần tổng hợp!"
|
| 134 |
+
|
| 135 |
+
# --- LOGIC CHECK LIMIT 250 ---
|
| 136 |
+
if len(text) > 250:
|
| 137 |
+
return None, f"❌ Văn bản quá dài ({len(text)}/250 ký tự)! Vui lòng cắt ngắn lại để đảm bảo chất lượng."
|
| 138 |
+
|
| 139 |
+
# Logic chọn Reference
|
| 140 |
+
if mode_tab == "custom_mode":
|
| 141 |
+
if custom_audio is None or not custom_text:
|
| 142 |
+
return None, "⚠️ Vui lòng tải lên Audio và nhập nội dung Audio đó."
|
| 143 |
+
ref_audio_path = custom_audio
|
| 144 |
+
ref_text_raw = custom_text
|
| 145 |
+
print("🎨 Mode: Custom Voice")
|
| 146 |
+
else: # Preset
|
| 147 |
+
if voice_choice not in VOICE_SAMPLES:
|
| 148 |
+
return None, "⚠️ Vui lòng chọn một giọng mẫu."
|
| 149 |
+
ref_audio_path = VOICE_SAMPLES[voice_choice]["audio"]
|
| 150 |
+
ref_text_path = VOICE_SAMPLES[voice_choice]["text"]
|
| 151 |
+
|
| 152 |
+
if not os.path.exists(ref_audio_path):
|
| 153 |
+
return None, f"❌ Không tìm thấy file audio: {ref_audio_path}"
|
| 154 |
+
|
| 155 |
+
with open(ref_text_path, "r", encoding="utf-8") as f:
|
| 156 |
+
ref_text_raw = f.read()
|
| 157 |
+
print(f"🎤 Mode: Preset Voice ({voice_choice})")
|
| 158 |
+
|
| 159 |
+
# Inference & Đo thời gian
|
| 160 |
+
print(f"📝 Text: {text[:50]}...")
|
| 161 |
+
|
| 162 |
+
start_time = time.time()
|
| 163 |
+
|
| 164 |
+
# Encode reference (with thread-safe cache + disk cache)
|
| 165 |
+
t1 = time.time()
|
| 166 |
+
cache_key = f"{mode_tab}:{voice_choice}" if mode_tab == "preset_mode" else ref_audio_path
|
| 167 |
+
|
| 168 |
+
with reference_cache_lock:
|
| 169 |
+
# Check memory cache first
|
| 170 |
+
if cache_key in reference_cache:
|
| 171 |
+
print(f" ✨ Using memory cache for {cache_key}")
|
| 172 |
+
ref_codes = reference_cache[cache_key]
|
| 173 |
+
else:
|
| 174 |
+
# Check disk cache
|
| 175 |
+
ref_codes = load_cache_from_disk(cache_key)
|
| 176 |
+
if ref_codes is not None:
|
| 177 |
+
print(f" 💿 Loaded from disk cache for {cache_key}")
|
| 178 |
+
reference_cache[cache_key] = ref_codes
|
| 179 |
+
else:
|
| 180 |
+
# Encode mới
|
| 181 |
+
print(f" 🔄 Encoding reference (first time for {cache_key})...")
|
| 182 |
+
ref_codes = tts.encode_reference(ref_audio_path)
|
| 183 |
+
reference_cache[cache_key] = ref_codes
|
| 184 |
+
save_cache_to_disk(cache_key, ref_codes)
|
| 185 |
+
print(f" 💾 Saved to disk cache for {cache_key}")
|
| 186 |
+
|
| 187 |
+
t2 = time.time()
|
| 188 |
+
print(f" ⏱️ Encode reference: {t2-t1:.2f}s")
|
| 189 |
+
|
| 190 |
+
# Inference
|
| 191 |
+
t3 = time.time()
|
| 192 |
+
wav = tts.infer(text, ref_codes, ref_text_raw)
|
| 193 |
+
t4 = time.time()
|
| 194 |
+
print(f" ⏱️ Inference: {t4-t3:.2f}s")
|
| 195 |
+
|
| 196 |
+
# Apply speed adjustment if needed (using Pydub - simple & clean)
|
| 197 |
+
if speed_factor != 1.0:
|
| 198 |
+
t5 = time.time()
|
| 199 |
+
print(f" 🎚️ Applying speed adjustment: {speed_factor}x")
|
| 200 |
+
print(f" 🔧 Using Pydub (simple & clean)")
|
| 201 |
+
|
| 202 |
+
# Pydub method - thay đổi frame rate
|
| 203 |
+
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp:
|
| 204 |
+
sf.write(tmp.name, wav, 24000)
|
| 205 |
+
tmp_path = tmp.name
|
| 206 |
+
|
| 207 |
+
sound = AudioSegment.from_wav(tmp_path)
|
| 208 |
+
new_frame_rate = int(sound.frame_rate * speed_factor)
|
| 209 |
+
sound_stretched = sound._spawn(sound.raw_data, overrides={'frame_rate': new_frame_rate})
|
| 210 |
+
sound_stretched = sound_stretched.set_frame_rate(24000)
|
| 211 |
+
|
| 212 |
+
wav = np.array(sound_stretched.get_array_of_samples()).astype(np.float32) / 32768.0
|
| 213 |
+
if sound_stretched.channels == 2:
|
| 214 |
+
wav = wav.reshape((-1, 2)).mean(axis=1)
|
| 215 |
+
|
| 216 |
+
os.unlink(tmp_path)
|
| 217 |
+
|
| 218 |
+
t6 = time.time()
|
| 219 |
+
print(f" ⏱️ Speed adjustment: {t6-t5:.2f}s")
|
| 220 |
+
|
| 221 |
+
end_time = time.time() # <--- Kết thúc bấm giờ
|
| 222 |
+
process_time = end_time - start_time # <--- Tính thời gian xử lý
|
| 223 |
+
|
| 224 |
+
# Save
|
| 225 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
|
| 226 |
+
sf.write(tmp_file.name, wav, 24000)
|
| 227 |
+
output_path = tmp_file.name
|
| 228 |
+
|
| 229 |
+
# <--- Cập nhật thông báo kết quả
|
| 230 |
+
speed_info = f" (Speed: {speed_factor}x)" if speed_factor != 1.0 else ""
|
| 231 |
+
return output_path, f"✅ Thành công! (Mất {process_time:.2f} giây để tạo){speed_info}"
|
| 232 |
+
|
| 233 |
+
except Exception as e:
|
| 234 |
+
import traceback
|
| 235 |
+
traceback.print_exc()
|
| 236 |
+
return None, f"❌ Lỗi hệ thống: {str(e)}"
|
| 237 |
+
|
| 238 |
+
# --- 4. UI SETUP ---
|
| 239 |
+
theme = gr.themes.Ocean(
|
| 240 |
+
primary_hue="indigo",
|
| 241 |
+
secondary_hue="cyan",
|
| 242 |
+
neutral_hue="slate",
|
| 243 |
+
font=[gr.themes.GoogleFont('Inter'), 'ui-sans-serif', 'system-ui'],
|
| 244 |
+
).set(
|
| 245 |
+
button_primary_background_fill="linear-gradient(90deg, #6366f1 0%, #0ea5e9 100%)",
|
| 246 |
+
button_primary_background_fill_hover="linear-gradient(90deg, #4f46e5 0%, #0284c7 100%)",
|
| 247 |
+
block_shadow="0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06)",
|
| 248 |
+
)
|
| 249 |
+
|
| 250 |
+
# <--- CSS ĐÃ SỬA (Background xanh đen + Chữ sáng)
|
| 251 |
+
css = """
|
| 252 |
+
.container { max-width: 1200px; margin: auto; }
|
| 253 |
+
.header-box {
|
| 254 |
+
text-align: center;
|
| 255 |
+
margin-bottom: 25px;
|
| 256 |
+
padding: 25px;
|
| 257 |
+
background: linear-gradient(135deg, #0f172a 0%, #1e293b 100%); /* Xanh đen (Slate 900 -> 800) */
|
| 258 |
+
border-radius: 12px;
|
| 259 |
+
border: 1px solid #334155;
|
| 260 |
+
box-shadow: 0 10px 15px -3px rgba(0, 0, 0, 0.3);
|
| 261 |
+
}
|
| 262 |
+
.header-title {
|
| 263 |
+
font-size: 2.5rem;
|
| 264 |
+
font-weight: 800;
|
| 265 |
+
color: white; /* Chữ trắng */
|
| 266 |
+
background: -webkit-linear-gradient(45deg, #60A5FA, #22D3EE); /* Gradient xanh sáng cho chữ nổi bật */
|
| 267 |
+
-webkit-background-clip: text;
|
| 268 |
+
-webkit-text-fill-color: transparent;
|
| 269 |
+
margin-bottom: 10px;
|
| 270 |
+
}
|
| 271 |
+
.header-desc {
|
| 272 |
+
font-size: 1.1rem;
|
| 273 |
+
color: #cbd5e1; /* Màu xám sáng (Slate-300) */
|
| 274 |
+
margin-bottom: 15px;
|
| 275 |
+
}
|
| 276 |
+
.link-group a {
|
| 277 |
+
text-decoration: none;
|
| 278 |
+
margin: 0 10px;
|
| 279 |
+
font-weight: 600;
|
| 280 |
+
color: #94a3b8; /* Màu link sáng hơn chút */
|
| 281 |
+
transition: color 0.2s;
|
| 282 |
+
}
|
| 283 |
+
.link-group a:hover { color: #38bdf8; text-shadow: 0 0 5px rgba(56, 189, 248, 0.5); }
|
| 284 |
+
|
| 285 |
+
.status-box { font-weight: bold; text-align: center; border: none; background: transparent; }
|
| 286 |
+
.warning-note {
|
| 287 |
+
background-color: #fff7ed;
|
| 288 |
+
border-left: 4px solid #f97316;
|
| 289 |
+
padding: 12px;
|
| 290 |
+
color: #9a3412;
|
| 291 |
+
font-size: 0.9rem;
|
| 292 |
+
border-radius: 4px;
|
| 293 |
+
margin-top: 10px;
|
| 294 |
+
margin-bottom: 10px;
|
| 295 |
+
}
|
| 296 |
+
"""
|
| 297 |
+
|
| 298 |
+
EXAMPLES_LIST = [
|
| 299 |
+
# Nam Miền Nam
|
| 300 |
+
["Về miền Tây không chỉ để ngắm nhìn sông nước hữu tình, mà còn để cảm nhận tấm chân tình của người dân nơi đây. Cùng ngồi xuồng ba lá len lỏi qua rặng dừa nước, nghe câu vọng cổ ngọt ngào thì còn gì bằng.", "Vĩnh (nam miền Nam)"],
|
| 301 |
+
|
| 302 |
+
# Nam Miền Bắc
|
| 303 |
+
["Hà Nội những ngày vào thu mang một vẻ đẹp trầm mặc và cổ kính đến lạ thường. Đi dạo quanh Hồ Gươm vào sáng sớm, hít hà mùi hoa sữa nồng nàn và thưởng thức chút cốm làng Vòng là trải nghiệm khó quên.", "Bình (nam miền Bắc)"],
|
| 304 |
+
|
| 305 |
+
# Nam Miền Bắc
|
| 306 |
+
["Sự bùng nổ của trí tuệ nhân tạo đang định hình lại cách chúng ta làm việc và sinh sống. Từ xe tự lái đến trợ lý ảo thông minh, công nghệ đang dần xóa nhòa ranh giới giữa thực tại và những bộ phim viễn tưởng.", "Tuyên (nam miền Bắc)"],
|
| 307 |
+
|
| 308 |
+
# Nam Miền Nam
|
| 309 |
+
["Sài Gòn hối hả là thế, nhưng chỉ cần tấp vào một quán cà phê ven đường, gọi ly bạc xỉu đá và ngắm nhìn dòng người qua lại, bạn sẽ thấy thành phố này cũng có những khoảng lặng thật bình yên và đáng yêu.", "Nguyên (nam miền Nam)"],
|
| 310 |
+
|
| 311 |
+
# Nam Miền Nam
|
| 312 |
+
["Để đảm bảo tiến độ dự án quan trọng này, chúng ta cần tập trung tối đa nguồn lực và phối hợp chặt chẽ giữa các phòng ban. Mọi khó khăn phát sinh cần được báo cáo ngay lập tức để ban lãnh đạo xử lý kịp thời.", "Sơn (nam miền Nam)"],
|
| 313 |
+
|
| 314 |
+
# Nữ Miền Nam
|
| 315 |
+
["Ngày xửa ngày xưa, ở một ngôi làng nọ có cô Tấm xinh đẹp, nết na nhưng sớm mồ côi mẹ. Dù bị mẹ kế và Cám hãm hại đủ đường, Tấm vẫn giữ được tấm lòng lương thiện và cuối cùng tìm được hạnh phúc xứng đáng.", "Đoan (nữ miền Nam)"],
|
| 316 |
+
|
| 317 |
+
# Nữ Miền Bắc
|
| 318 |
+
["Dạ em chào anh chị, hiện tại bên em đang có chương trình ưu đãi đặc biệt cho căn hộ hướng sông này. Với thiết kế hiện đại và không gian xanh mát, đây chắc chắn là tổ ấm lý tưởng mà gia đình mình đang tìm kiếm.", "Ly (nữ miền Bắc)"],
|
| 319 |
+
|
| 320 |
+
# Nữ Miền Bắc
|
| 321 |
+
["Dưới cơn mưa phùn lất phất của những ngày cuối đông, em khẽ nép vào vai anh, cảm nhận hơi ấm lan tỏa. Những khoảnh khắc bình dị như thế này khiến em nhận ra r��ng, hạnh phúc đôi khi chỉ đơn giản là được ở bên nhau.", "Ngọc (nữ miền Bắc)"],
|
| 322 |
+
]
|
| 323 |
+
|
| 324 |
+
with gr.Blocks(theme=theme, css=css, title="VieNeu-TTS Studio") as demo:
|
| 325 |
+
|
| 326 |
+
with gr.Column(elem_classes="container"):
|
| 327 |
+
# Header - Cập nhật class cho HTML
|
| 328 |
+
gr.HTML("""
|
| 329 |
+
<div class="header-box">
|
| 330 |
+
<div class="header-title">🎙️ VieNeu-TTS Studio</div>
|
| 331 |
+
<div class="header-desc">
|
| 332 |
+
Phiên bản: VieNeu-TTS-1000h (model mới nhất, train trên 1000 giờ dữ liệu)
|
| 333 |
+
</div>
|
| 334 |
+
<div class="link-group">
|
| 335 |
+
<a href="https://huggingface.co/pnnbao-ump/VieNeu-TTS" target="_blank">🤗 Model Card</a> •
|
| 336 |
+
<a href="https://huggingface.co/datasets/pnnbao-ump/VieNeu-TTS-1000h" target="_blank">📖 Dataset 1000h</a> •
|
| 337 |
+
<a href="https://github.com/pnnbao97/VieNeu-TTS" target="_blank">🦜 GitHub</a>
|
| 338 |
+
</div>
|
| 339 |
+
</div>
|
| 340 |
+
""")
|
| 341 |
+
|
| 342 |
+
with gr.Row(elem_classes="container", equal_height=False):
|
| 343 |
+
|
| 344 |
+
# --- LEFT: INPUT ---
|
| 345 |
+
with gr.Column(scale=3, variant="panel"):
|
| 346 |
+
gr.Markdown("### 📝 Văn bản đầu vào")
|
| 347 |
+
text_input = gr.Textbox(
|
| 348 |
+
label="Nhập văn bản",
|
| 349 |
+
placeholder="Nhập nội dung tiếng Việt cần chuyển thành giọng nói...",
|
| 350 |
+
lines=4,
|
| 351 |
+
value="Sự bùng nổ của trí tuệ nhân tạo đang định hình lại cách chúng ta làm việc và sinh sống. Từ xe tự lái đến trợ lý ảo thông minh, công nghệ đang dần xóa nhòa ranh giới giữa thực tại và những bộ phim viễn tưởng.",
|
| 352 |
+
show_label=False
|
| 353 |
+
)
|
| 354 |
+
|
| 355 |
+
# Counter + Warning
|
| 356 |
+
with gr.Row():
|
| 357 |
+
char_count = gr.HTML("<div style='text-align: right; color: #64748B; font-size: 0.8rem;'>0 / 250 ký tự</div>")
|
| 358 |
+
|
| 359 |
+
gr.Markdown("### 🗣️ Chọn giọng đọc")
|
| 360 |
+
with gr.Tabs() as tabs:
|
| 361 |
+
with gr.TabItem("👤 Giọng có sẵn (Preset)", id="preset_mode"):
|
| 362 |
+
voice_select = gr.Dropdown(
|
| 363 |
+
choices=list(VOICE_SAMPLES.keys()),
|
| 364 |
+
value="Tuyên (nam miền Bắc)",
|
| 365 |
+
label="Danh sách giọng",
|
| 366 |
+
interactive=True
|
| 367 |
+
)
|
| 368 |
+
with gr.Accordion("Thông tin giọng mẫu", open=False):
|
| 369 |
+
ref_audio_preview = gr.Audio(label="Audio mẫu", interactive=False, type="filepath")
|
| 370 |
+
ref_text_preview = gr.Markdown("...")
|
| 371 |
+
|
| 372 |
+
with gr.TabItem("🎙️ Giọng tùy chỉnh (Custom)", id="custom_mode"):
|
| 373 |
+
gr.Markdown("Tải lên giọng của bạn (Zero-shot Cloning)")
|
| 374 |
+
custom_audio = gr.Audio(label="File ghi âm (.wav)", type="filepath")
|
| 375 |
+
custom_text = gr.Textbox(label="Nội dung ghi âm", placeholder="Nhập chính xác lời thoại...")
|
| 376 |
+
|
| 377 |
+
current_mode = gr.Textbox(visible=False, value="preset_mode")
|
| 378 |
+
|
| 379 |
+
gr.Markdown("### ⚙️ Cài đặt nâng cao")
|
| 380 |
+
speed_slider = gr.Slider(
|
| 381 |
+
minimum=0.5,
|
| 382 |
+
maximum=2.0,
|
| 383 |
+
value=1.0,
|
| 384 |
+
step=0.1,
|
| 385 |
+
label="Tốc độ giọng nói (Speed)",
|
| 386 |
+
info="0.5x = chậm, 1.0x = bình thường, 2.0x = nhanh"
|
| 387 |
+
)
|
| 388 |
+
|
| 389 |
+
btn_generate = gr.Button("Tổng hợp giọng nói", variant="primary", size="lg")
|
| 390 |
+
|
| 391 |
+
# --- RIGHT: OUTPUT ---
|
| 392 |
+
with gr.Column(scale=2):
|
| 393 |
+
gr.Markdown("### 🎧 Kết quả")
|
| 394 |
+
with gr.Group():
|
| 395 |
+
audio_output = gr.Audio(label="Audio đầu ra", type="filepath", show_download_button=True, autoplay=True)
|
| 396 |
+
status_output = gr.Textbox(label="Trạng thái", show_label=False, elem_classes="status-box", placeholder="Sẵn sàng...")
|
| 397 |
+
|
| 398 |
+
# --- EXAMPLES ---
|
| 399 |
+
with gr.Row(elem_classes="container"):
|
| 400 |
+
with gr.Column():
|
| 401 |
+
gr.Markdown("### 📚 Ví dụ mẫu")
|
| 402 |
+
gr.Examples(examples=EXAMPLES_LIST, inputs=[text_input, voice_select], label="Thử nghiệm nhanh")
|
| 403 |
+
|
| 404 |
+
# --- LOGIC ---
|
| 405 |
+
def update_count(text):
|
| 406 |
+
l = len(text)
|
| 407 |
+
if l > 250:
|
| 408 |
+
color = "#dc2626" # Red
|
| 409 |
+
msg = f"⚠️ <b>{l} / 250</b> - Quá giới hạn!"
|
| 410 |
+
elif l > 200:
|
| 411 |
+
color = "#ea580c" # Orange
|
| 412 |
+
msg = f"{l} / 250"
|
| 413 |
+
else:
|
| 414 |
+
color = "#64748B" # Gray
|
| 415 |
+
msg = f"{l} / 250 ký tự"
|
| 416 |
+
return f"<div style='text-align: right; color: {color}; font-size: 0.8rem; font-weight: bold'>{msg}</div>"
|
| 417 |
+
|
| 418 |
+
text_input.change(update_count, text_input, char_count)
|
| 419 |
+
|
| 420 |
+
def update_ref_preview(voice):
|
| 421 |
+
audio, text = load_reference_info(voice)
|
| 422 |
+
return audio, f"> *\"{text}\"*"
|
| 423 |
+
|
| 424 |
+
voice_select.change(update_ref_preview, voice_select, [ref_audio_preview, ref_text_preview])
|
| 425 |
+
demo.load(update_ref_preview, voice_select, [ref_audio_preview, ref_text_preview])
|
| 426 |
+
|
| 427 |
+
# Tab handling - FIXED WITH *ARGS
|
| 428 |
+
tab_preset = tabs.children[0]
|
| 429 |
+
tab_custom = tabs.children[1]
|
| 430 |
+
|
| 431 |
+
# Dùng *args để nhận bất kỳ số lượng tham số nào (0 hoặc 1), tránh lỗi Warning
|
| 432 |
+
tab_preset.select(fn=lambda *args: "preset_mode", inputs=None, outputs=current_mode)
|
| 433 |
+
tab_custom.select(fn=lambda *args: "custom_mode", inputs=None, outputs=current_mode)
|
| 434 |
+
|
| 435 |
+
btn_generate.click(
|
| 436 |
+
fn=synthesize_speech,
|
| 437 |
+
inputs=[text_input, voice_select, custom_audio, custom_text, current_mode, speed_slider],
|
| 438 |
+
outputs=[audio_output, status_output]
|
| 439 |
+
)
|
| 440 |
+
|
| 441 |
+
if __name__ == "__main__":
|
| 442 |
+
demo.queue(default_concurrency_limit=20).launch(
|
| 443 |
+
server_name="127.0.0.1",
|
| 444 |
+
server_port=7860,
|
| 445 |
+
share=False
|
| 446 |
+
)
|
main.py
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from vieneu_tts import VieNeuTTS
|
| 2 |
+
import soundfile as sf
|
| 3 |
+
import torch
|
| 4 |
+
import os
|
| 5 |
+
|
| 6 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 7 |
+
|
| 8 |
+
input_texts = [
|
| 9 |
+
"Các khóa học trực tuyến đang giúp học sinh tiếp cận kiến thức mọi lúc mọi nơi. Giáo viên sử dụng video, bài tập tương tác và thảo luận trực tuyến để nâng cao hiệu quả học tập.",
|
| 10 |
+
|
| 11 |
+
"Các nghiên cứu về bệnh Alzheimer cho thấy tác dụng tích cực của các bài tập trí não và chế độ dinh dưỡng lành mạnh, giúp giảm tốc độ suy giảm trí nhớ ở người cao tuổi.",
|
| 12 |
+
|
| 13 |
+
"Một tiểu thuyết trinh thám hiện đại dẫn dắt độc giả qua những tình tiết phức tạp, bí ẩn, kết hợp yếu tố tâm lý sâu sắc khiến người đọc luôn hồi hộp theo dõi diễn biến câu chuyện.",
|
| 14 |
+
|
| 15 |
+
"Các nhà khoa học nghiên cứu gen người phát hiện những đột biến mới liên quan đến bệnh di truyền. Điều này giúp nâng cao khả năng chẩn đoán và điều trị.",
|
| 16 |
+
]
|
| 17 |
+
|
| 18 |
+
output_dir = "./output_audio"
|
| 19 |
+
os.makedirs(output_dir, exist_ok=True)
|
| 20 |
+
|
| 21 |
+
def main(backbone="pnnbao-ump/VieNeu-TTS", codec="neuphonic/neucodec"):
|
| 22 |
+
"""
|
| 23 |
+
In the sample directory, there are wav files and txt files with matching names.
|
| 24 |
+
These are pre-prepared reference files for testing with Vietnamese names:
|
| 25 |
+
- Bình (nam miền Bắc) - Male, North accent
|
| 26 |
+
- Tuyên (nam miền Bắc) - Male, North accent
|
| 27 |
+
- Nguyên (nam miền Nam) - Male, South accent
|
| 28 |
+
- Sơn (nam miền Nam) - Male, South accent
|
| 29 |
+
- Vĩnh (nam miền Nam) - Male, South accent
|
| 30 |
+
- Hương (nữ miền Bắc) - Female, North accent
|
| 31 |
+
- Ly (nữ miền Bắc) - Female, North accent
|
| 32 |
+
- Ngọc (nữ miền Bắc) - Female, North accent
|
| 33 |
+
- Đoan (nữ miền Nam) - Female, South accent
|
| 34 |
+
- Dung (nữ miền Nam) - Female, South accent
|
| 35 |
+
|
| 36 |
+
Note: The model can clone any voice you provide (with corresponding text).
|
| 37 |
+
However, quality may not match the sample files. For best results, finetune
|
| 38 |
+
the model on your target voice. See finetune guide at:
|
| 39 |
+
https://github.com/pnnbao-ump/VieNeuTTS/blob/main/finetune.ipynb
|
| 40 |
+
"""
|
| 41 |
+
# Male voice (South accent)
|
| 42 |
+
ref_audio_path = "./sample/Vĩnh (nam miền Nam).wav"
|
| 43 |
+
ref_text_path = "./sample/Vĩnh (nam miền Nam).txt"
|
| 44 |
+
|
| 45 |
+
# Female voice (South accent) - uncomment to use
|
| 46 |
+
# ref_audio_path = "./sample/Đoan (nữ miền Nam).wav"
|
| 47 |
+
# ref_text_path = "./sample/Đoan (nữ miền Nam).txt"
|
| 48 |
+
|
| 49 |
+
ref_text_raw = open(ref_text_path, "r", encoding="utf-8").read()
|
| 50 |
+
|
| 51 |
+
if not ref_audio_path or not ref_text_raw:
|
| 52 |
+
print("No reference audio or text provided.")
|
| 53 |
+
return None
|
| 54 |
+
|
| 55 |
+
# Initialize VieNeuTTS-1000h
|
| 56 |
+
tts = VieNeuTTS(
|
| 57 |
+
backbone_repo=backbone,
|
| 58 |
+
backbone_device=device,
|
| 59 |
+
codec_repo=codec,
|
| 60 |
+
codec_device=device
|
| 61 |
+
)
|
| 62 |
+
|
| 63 |
+
print("Encoding reference audio...")
|
| 64 |
+
ref_codes = tts.encode_reference(ref_audio_path)
|
| 65 |
+
|
| 66 |
+
# Generate speech for all input texts
|
| 67 |
+
for i, text in enumerate(input_texts, 1):
|
| 68 |
+
print(f"Generating audio {i}/{len(input_texts)}: {text[:50]}...")
|
| 69 |
+
wav = tts.infer(text, ref_codes, ref_text_raw)
|
| 70 |
+
output_path = os.path.join(output_dir, f"output_{i}.wav")
|
| 71 |
+
sf.write(output_path, wav, 24000)
|
| 72 |
+
print(f"✓ Saved to {output_path}")
|
| 73 |
+
|
| 74 |
+
if __name__ == "__main__":
|
| 75 |
+
main()
|
pyproject.toml
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[tool.uv]
|
| 2 |
+
index-strategy = "unsafe-best-match"
|
| 3 |
+
|
| 4 |
+
[[tool.uv.index]]
|
| 5 |
+
name = "pytorch"
|
| 6 |
+
url = "https://download.pytorch.org/whl/cu118"
|
| 7 |
+
|
| 8 |
+
[project]
|
| 9 |
+
name = "VieNeu-TTS"
|
| 10 |
+
version = "0.1.0"
|
| 11 |
+
description = "VieNeu-TTS - TTS for Vietnamese language"
|
| 12 |
+
readme = "README.md"
|
| 13 |
+
requires-python = ">=3.12"
|
| 14 |
+
dependencies = [
|
| 15 |
+
"phonemizer>=3.3.0",
|
| 16 |
+
"torch",
|
| 17 |
+
"torchvision",
|
| 18 |
+
"torchaudio",
|
| 19 |
+
"huggingface-hub[cli]>=0.36.0",
|
| 20 |
+
"neucodec>=0.0.4",
|
| 21 |
+
"librosa>=0.11.0",
|
| 22 |
+
"gradio>=5.49.1",
|
| 23 |
+
]
|
| 24 |
+
|
| 25 |
+
[tool.uv.sources]
|
| 26 |
+
torch = { index = "pytorch" }
|
| 27 |
+
torchvision = { index = "pytorch" }
|
| 28 |
+
torchaudio = { index = "pytorch" }
|
requirements.txt
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
phonemizer>=3.3.0
|
| 2 |
+
torch
|
| 3 |
+
torchvision
|
| 4 |
+
torchaudio
|
| 5 |
+
gradio
|
| 6 |
+
neucodec>=0.0.4
|
| 7 |
+
librosa>=0.11.0
|
| 8 |
+
fastapi
|
| 9 |
+
uvicorn
|
| 10 |
+
soundfile
|
| 11 |
+
pydub
|
| 12 |
+
aiohttp
|
| 13 |
+
aiofiles
|
| 14 |
+
pygame
|
server_manager.py
ADDED
|
@@ -0,0 +1,468 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
TTS Server Manager - Giao diện quản lý server với điều chỉnh thông số real-time
|
| 3 |
+
"""
|
| 4 |
+
import tkinter as tk
|
| 5 |
+
from tkinter import ttk, messagebox
|
| 6 |
+
import requests
|
| 7 |
+
import threading
|
| 8 |
+
import time
|
| 9 |
+
import json
|
| 10 |
+
from datetime import datetime
|
| 11 |
+
import subprocess
|
| 12 |
+
import os
|
| 13 |
+
import signal
|
| 14 |
+
|
| 15 |
+
class ServerManager:
|
| 16 |
+
def __init__(self, root):
|
| 17 |
+
self.root = root
|
| 18 |
+
self.root.title("VieNeu-TTS Server Manager")
|
| 19 |
+
self.root.geometry("800x600")
|
| 20 |
+
|
| 21 |
+
# Server config
|
| 22 |
+
self.server_url = "http://127.0.0.1:8000"
|
| 23 |
+
self.server_process = None
|
| 24 |
+
|
| 25 |
+
# Current settings
|
| 26 |
+
self.current_settings = {
|
| 27 |
+
"gpu_semaphore": 2,
|
| 28 |
+
"cpu_semaphore": 4,
|
| 29 |
+
"io_semaphore": 6,
|
| 30 |
+
"thread_pool": 6
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
self.setup_ui()
|
| 34 |
+
self.start_monitoring()
|
| 35 |
+
|
| 36 |
+
def setup_ui(self):
|
| 37 |
+
# Main notebook
|
| 38 |
+
notebook = ttk.Notebook(self.root)
|
| 39 |
+
notebook.pack(fill=tk.BOTH, expand=True, padx=10, pady=10)
|
| 40 |
+
|
| 41 |
+
# Server Control Tab
|
| 42 |
+
self.setup_server_tab(notebook)
|
| 43 |
+
|
| 44 |
+
# Settings Tab
|
| 45 |
+
self.setup_settings_tab(notebook)
|
| 46 |
+
|
| 47 |
+
# Monitor Tab
|
| 48 |
+
self.setup_monitor_tab(notebook)
|
| 49 |
+
|
| 50 |
+
def setup_server_tab(self, notebook):
|
| 51 |
+
"""Tab điều khiển server"""
|
| 52 |
+
server_frame = ttk.Frame(notebook)
|
| 53 |
+
notebook.add(server_frame, text="Server Control")
|
| 54 |
+
|
| 55 |
+
# Server status
|
| 56 |
+
status_frame = ttk.LabelFrame(server_frame, text="Server Status", padding="10")
|
| 57 |
+
status_frame.pack(fill=tk.X, pady=(0, 10))
|
| 58 |
+
|
| 59 |
+
self.status_label = ttk.Label(status_frame, text="Checking server...", font=("Arial", 12))
|
| 60 |
+
self.status_label.pack()
|
| 61 |
+
|
| 62 |
+
# Server controls
|
| 63 |
+
control_frame = ttk.LabelFrame(server_frame, text="Server Controls", padding="10")
|
| 64 |
+
control_frame.pack(fill=tk.X, pady=(0, 10))
|
| 65 |
+
|
| 66 |
+
button_frame = ttk.Frame(control_frame)
|
| 67 |
+
button_frame.pack()
|
| 68 |
+
|
| 69 |
+
self.start_btn = ttk.Button(button_frame, text="Start Server", command=self.start_server)
|
| 70 |
+
self.start_btn.pack(side=tk.LEFT, padx=(0, 10))
|
| 71 |
+
|
| 72 |
+
self.stop_btn = ttk.Button(button_frame, text="Stop Server", command=self.stop_server, state="disabled")
|
| 73 |
+
self.stop_btn.pack(side=tk.LEFT, padx=(0, 10))
|
| 74 |
+
|
| 75 |
+
self.restart_btn = ttk.Button(button_frame, text="Restart Server", command=self.restart_server)
|
| 76 |
+
self.restart_btn.pack(side=tk.LEFT)
|
| 77 |
+
|
| 78 |
+
# Quick actions
|
| 79 |
+
quick_frame = ttk.LabelFrame(server_frame, text="Quick Actions", padding="10")
|
| 80 |
+
quick_frame.pack(fill=tk.X, pady=(0, 10))
|
| 81 |
+
|
| 82 |
+
ttk.Button(quick_frame, text="Open API Docs", command=self.open_api_docs).pack(side=tk.LEFT, padx=(0, 10))
|
| 83 |
+
ttk.Button(quick_frame, text="Test Server", command=self.test_server).pack(side=tk.LEFT, padx=(0, 10))
|
| 84 |
+
ttk.Button(quick_frame, text="Clear Cache", command=self.clear_cache).pack(side=tk.LEFT)
|
| 85 |
+
|
| 86 |
+
def setup_settings_tab(self, notebook):
|
| 87 |
+
"""Tab cài đặt async parameters"""
|
| 88 |
+
settings_frame = ttk.Frame(notebook)
|
| 89 |
+
notebook.add(settings_frame, text="Async Settings")
|
| 90 |
+
|
| 91 |
+
# Current settings display
|
| 92 |
+
current_frame = ttk.LabelFrame(settings_frame, text="Current Settings", padding="10")
|
| 93 |
+
current_frame.pack(fill=tk.X, pady=(0, 10))
|
| 94 |
+
|
| 95 |
+
self.current_settings_text = tk.Text(current_frame, height=4, width=60, state="disabled")
|
| 96 |
+
self.current_settings_text.pack()
|
| 97 |
+
|
| 98 |
+
# Settings controls
|
| 99 |
+
control_frame = ttk.LabelFrame(settings_frame, text="Adjust Settings", padding="10")
|
| 100 |
+
control_frame.pack(fill=tk.X, pady=(0, 10))
|
| 101 |
+
|
| 102 |
+
# GPU Semaphore
|
| 103 |
+
gpu_frame = ttk.Frame(control_frame)
|
| 104 |
+
gpu_frame.pack(fill=tk.X, pady=(0, 5))
|
| 105 |
+
ttk.Label(gpu_frame, text="GPU Semaphore (1-4):").pack(side=tk.LEFT)
|
| 106 |
+
self.gpu_var = tk.StringVar(value="2")
|
| 107 |
+
gpu_spin = ttk.Spinbox(gpu_frame, from_=1, to=4, width=10, textvariable=self.gpu_var)
|
| 108 |
+
gpu_spin.pack(side=tk.RIGHT)
|
| 109 |
+
|
| 110 |
+
# CPU Semaphore
|
| 111 |
+
cpu_frame = ttk.Frame(control_frame)
|
| 112 |
+
cpu_frame.pack(fill=tk.X, pady=(0, 5))
|
| 113 |
+
ttk.Label(cpu_frame, text="CPU Semaphore (2-8):").pack(side=tk.LEFT)
|
| 114 |
+
self.cpu_var = tk.StringVar(value="4")
|
| 115 |
+
cpu_spin = ttk.Spinbox(cpu_frame, from_=2, to=8, width=10, textvariable=self.cpu_var)
|
| 116 |
+
cpu_spin.pack(side=tk.RIGHT)
|
| 117 |
+
|
| 118 |
+
# I/O Semaphore
|
| 119 |
+
io_frame = ttk.Frame(control_frame)
|
| 120 |
+
io_frame.pack(fill=tk.X, pady=(0, 5))
|
| 121 |
+
ttk.Label(io_frame, text="I/O Semaphore (3-10):").pack(side=tk.LEFT)
|
| 122 |
+
self.io_var = tk.StringVar(value="6")
|
| 123 |
+
io_spin = ttk.Spinbox(io_frame, from_=3, to=10, width=10, textvariable=self.io_var)
|
| 124 |
+
io_spin.pack(side=tk.RIGHT)
|
| 125 |
+
|
| 126 |
+
# Thread Pool
|
| 127 |
+
thread_frame = ttk.Frame(control_frame)
|
| 128 |
+
thread_frame.pack(fill=tk.X, pady=(0, 10))
|
| 129 |
+
ttk.Label(thread_frame, text="Thread Pool (2-12):").pack(side=tk.LEFT)
|
| 130 |
+
self.thread_var = tk.StringVar(value="6")
|
| 131 |
+
thread_spin = ttk.Spinbox(thread_frame, from_=2, to=12, width=10, textvariable=self.thread_var)
|
| 132 |
+
thread_spin.pack(side=tk.RIGHT)
|
| 133 |
+
|
| 134 |
+
# Apply buttons
|
| 135 |
+
apply_frame = ttk.Frame(control_frame)
|
| 136 |
+
apply_frame.pack(fill=tk.X)
|
| 137 |
+
|
| 138 |
+
ttk.Button(apply_frame, text="Apply Settings", command=self.apply_settings).pack(side=tk.LEFT, padx=(0, 10))
|
| 139 |
+
ttk.Button(apply_frame, text="Reset to Default", command=self.reset_settings).pack(side=tk.LEFT, padx=(0, 10))
|
| 140 |
+
ttk.Button(apply_frame, text="Load Presets", command=self.show_presets).pack(side=tk.LEFT)
|
| 141 |
+
|
| 142 |
+
# Presets
|
| 143 |
+
presets_frame = ttk.LabelFrame(settings_frame, text="Performance Presets", padding="10")
|
| 144 |
+
presets_frame.pack(fill=tk.X)
|
| 145 |
+
|
| 146 |
+
preset_buttons = ttk.Frame(presets_frame)
|
| 147 |
+
preset_buttons.pack()
|
| 148 |
+
|
| 149 |
+
ttk.Button(preset_buttons, text="Light (1,2,3,3)", command=lambda: self.apply_preset(1,2,3,3)).pack(side=tk.LEFT, padx=(0, 5))
|
| 150 |
+
ttk.Button(preset_buttons, text="Balanced (2,4,6,6)", command=lambda: self.apply_preset(2,4,6,6)).pack(side=tk.LEFT, padx=(0, 5))
|
| 151 |
+
ttk.Button(preset_buttons, text="Performance (3,6,8,8)", command=lambda: self.apply_preset(3,6,8,8)).pack(side=tk.LEFT, padx=(0, 5))
|
| 152 |
+
ttk.Button(preset_buttons, text="Max (4,8,10,12)", command=lambda: self.apply_preset(4,8,10,12)).pack(side=tk.LEFT)
|
| 153 |
+
|
| 154 |
+
def setup_monitor_tab(self, notebook):
|
| 155 |
+
"""Tab monitor server"""
|
| 156 |
+
monitor_frame = ttk.Frame(notebook)
|
| 157 |
+
notebook.add(monitor_frame, text="Monitor")
|
| 158 |
+
|
| 159 |
+
# Resource usage
|
| 160 |
+
resource_frame = ttk.LabelFrame(monitor_frame, text="Resource Usage", padding="10")
|
| 161 |
+
resource_frame.pack(fill=tk.X, pady=(0, 10))
|
| 162 |
+
|
| 163 |
+
self.resource_text = tk.Text(resource_frame, height=8, width=70, state="disabled")
|
| 164 |
+
self.resource_text.pack()
|
| 165 |
+
|
| 166 |
+
# Recent requests
|
| 167 |
+
requests_frame = ttk.LabelFrame(monitor_frame, text="Recent Activity", padding="10")
|
| 168 |
+
requests_frame.pack(fill=tk.BOTH, expand=True)
|
| 169 |
+
|
| 170 |
+
self.activity_text = tk.Text(requests_frame, height=15, width=70, state="disabled")
|
| 171 |
+
scrollbar = ttk.Scrollbar(requests_frame, orient="vertical", command=self.activity_text.yview)
|
| 172 |
+
self.activity_text.configure(yscrollcommand=scrollbar.set)
|
| 173 |
+
|
| 174 |
+
self.activity_text.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
|
| 175 |
+
scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
|
| 176 |
+
|
| 177 |
+
# Auto refresh
|
| 178 |
+
refresh_frame = ttk.Frame(monitor_frame)
|
| 179 |
+
refresh_frame.pack(fill=tk.X, pady=(10, 0))
|
| 180 |
+
|
| 181 |
+
self.auto_refresh_var = tk.BooleanVar(value=True)
|
| 182 |
+
ttk.Checkbutton(refresh_frame, text="Auto Refresh", variable=self.auto_refresh_var).pack(side=tk.LEFT)
|
| 183 |
+
ttk.Button(refresh_frame, text="Refresh Now", command=self.refresh_monitor).pack(side=tk.RIGHT)
|
| 184 |
+
|
| 185 |
+
def start_server(self):
|
| 186 |
+
"""Start TTS server"""
|
| 187 |
+
try:
|
| 188 |
+
if self.server_process and self.server_process.poll() is None:
|
| 189 |
+
messagebox.showwarning("Warning", "Server is already running!")
|
| 190 |
+
return
|
| 191 |
+
|
| 192 |
+
self.log_activity("Starting TTS server...")
|
| 193 |
+
|
| 194 |
+
# Start server process
|
| 195 |
+
self.server_process = subprocess.Popen(
|
| 196 |
+
["python", "tts_server.py"],
|
| 197 |
+
stdout=subprocess.PIPE,
|
| 198 |
+
stderr=subprocess.STDOUT,
|
| 199 |
+
text=True,
|
| 200 |
+
bufsize=1,
|
| 201 |
+
universal_newlines=True
|
| 202 |
+
)
|
| 203 |
+
|
| 204 |
+
self.start_btn.config(state="disabled")
|
| 205 |
+
self.stop_btn.config(state="normal")
|
| 206 |
+
|
| 207 |
+
# Monitor server output
|
| 208 |
+
threading.Thread(target=self.monitor_server_output, daemon=True).start()
|
| 209 |
+
|
| 210 |
+
self.log_activity("Server starting... Please wait for initialization.")
|
| 211 |
+
|
| 212 |
+
except Exception as e:
|
| 213 |
+
messagebox.showerror("Error", f"Failed to start server: {str(e)}")
|
| 214 |
+
|
| 215 |
+
def stop_server(self):
|
| 216 |
+
"""Stop TTS server"""
|
| 217 |
+
try:
|
| 218 |
+
if self.server_process:
|
| 219 |
+
self.log_activity("Stopping TTS server...")
|
| 220 |
+
self.server_process.terminate()
|
| 221 |
+
self.server_process.wait(timeout=10)
|
| 222 |
+
self.server_process = None
|
| 223 |
+
|
| 224 |
+
self.start_btn.config(state="normal")
|
| 225 |
+
self.stop_btn.config(state="disabled")
|
| 226 |
+
self.log_activity("Server stopped.")
|
| 227 |
+
|
| 228 |
+
except Exception as e:
|
| 229 |
+
messagebox.showerror("Error", f"Failed to stop server: {str(e)}")
|
| 230 |
+
|
| 231 |
+
def restart_server(self):
|
| 232 |
+
"""Restart server"""
|
| 233 |
+
self.stop_server()
|
| 234 |
+
time.sleep(2)
|
| 235 |
+
self.start_server()
|
| 236 |
+
|
| 237 |
+
def monitor_server_output(self):
|
| 238 |
+
"""Monitor server console output"""
|
| 239 |
+
if not self.server_process:
|
| 240 |
+
return
|
| 241 |
+
|
| 242 |
+
try:
|
| 243 |
+
for line in iter(self.server_process.stdout.readline, ''):
|
| 244 |
+
if line:
|
| 245 |
+
# Only log important messages, not all console output
|
| 246 |
+
if any(keyword in line for keyword in ['✅', '❌', '🎤', '📊', 'ERROR', 'WARNING']):
|
| 247 |
+
self.log_activity(f"Server: {line.strip()}")
|
| 248 |
+
|
| 249 |
+
if self.server_process.poll() is not None:
|
| 250 |
+
break
|
| 251 |
+
|
| 252 |
+
except Exception as e:
|
| 253 |
+
self.log_activity(f"Error monitoring server: {str(e)}")
|
| 254 |
+
|
| 255 |
+
def apply_settings(self):
|
| 256 |
+
"""Apply new async settings"""
|
| 257 |
+
try:
|
| 258 |
+
new_settings = {
|
| 259 |
+
"gpu_semaphore": int(self.gpu_var.get()),
|
| 260 |
+
"cpu_semaphore": int(self.cpu_var.get()),
|
| 261 |
+
"io_semaphore": int(self.io_var.get()),
|
| 262 |
+
"thread_pool": int(self.thread_var.get())
|
| 263 |
+
}
|
| 264 |
+
|
| 265 |
+
# Send settings to server via API
|
| 266 |
+
response = requests.post(f"{self.server_url}/admin/update_settings", json=new_settings, timeout=5)
|
| 267 |
+
|
| 268 |
+
if response.status_code == 200:
|
| 269 |
+
self.current_settings = new_settings
|
| 270 |
+
self.update_current_settings_display()
|
| 271 |
+
self.log_activity(f"Settings applied: GPU({new_settings['gpu_semaphore']}) CPU({new_settings['cpu_semaphore']}) I/O({new_settings['io_semaphore']}) Threads({new_settings['thread_pool']})")
|
| 272 |
+
messagebox.showinfo("Success", "Settings applied successfully!")
|
| 273 |
+
else:
|
| 274 |
+
messagebox.showerror("Error", f"Failed to apply settings: {response.text}")
|
| 275 |
+
|
| 276 |
+
except requests.exceptions.RequestException:
|
| 277 |
+
messagebox.showwarning("Warning", "Server not responding. Settings will be applied on next restart.")
|
| 278 |
+
except Exception as e:
|
| 279 |
+
messagebox.showerror("Error", f"Failed to apply settings: {str(e)}")
|
| 280 |
+
|
| 281 |
+
def apply_preset(self, gpu, cpu, io, threads):
|
| 282 |
+
"""Apply preset configuration"""
|
| 283 |
+
self.gpu_var.set(str(gpu))
|
| 284 |
+
self.cpu_var.set(str(cpu))
|
| 285 |
+
self.io_var.set(str(io))
|
| 286 |
+
self.thread_var.set(str(threads))
|
| 287 |
+
self.apply_settings()
|
| 288 |
+
|
| 289 |
+
def reset_settings(self):
|
| 290 |
+
"""Reset to default settings"""
|
| 291 |
+
self.gpu_var.set("2")
|
| 292 |
+
self.cpu_var.set("4")
|
| 293 |
+
self.io_var.set("6")
|
| 294 |
+
self.thread_var.set("6")
|
| 295 |
+
|
| 296 |
+
def show_presets(self):
|
| 297 |
+
"""Show preset explanations"""
|
| 298 |
+
presets_info = """
|
| 299 |
+
Performance Presets:
|
| 300 |
+
|
| 301 |
+
• Light (1,2,3,3): Minimal resource usage, good for testing
|
| 302 |
+
• Balanced (2,4,6,6): Default configuration, good performance
|
| 303 |
+
• Performance (3,6,8,8): High performance, more resource usage
|
| 304 |
+
• Max (4,8,10,12): Maximum performance, highest resource usage
|
| 305 |
+
|
| 306 |
+
Note: Higher values = better performance but more resource usage
|
| 307 |
+
"""
|
| 308 |
+
messagebox.showinfo("Presets Info", presets_info)
|
| 309 |
+
|
| 310 |
+
def check_server_status(self):
|
| 311 |
+
"""Check server status"""
|
| 312 |
+
try:
|
| 313 |
+
response = requests.get(f"{self.server_url}/health", timeout=3)
|
| 314 |
+
if response.status_code == 200:
|
| 315 |
+
data = response.json()
|
| 316 |
+
status_text = f"✅ Server Online | Model: {data.get('model_status', 'unknown')}"
|
| 317 |
+
|
| 318 |
+
if 'async_resources' in data:
|
| 319 |
+
async_info = data['async_resources']
|
| 320 |
+
status_text += f" | GPU: {async_info.get('gpu_available', 0)}"
|
| 321 |
+
status_text += f" | CPU: {async_info.get('cpu_available', 0)}"
|
| 322 |
+
status_text += f" | I/O: {async_info.get('io_available', 0)}"
|
| 323 |
+
|
| 324 |
+
self.status_label.config(text=status_text, foreground="green")
|
| 325 |
+
return True
|
| 326 |
+
else:
|
| 327 |
+
self.status_label.config(text="❌ Server Error", foreground="red")
|
| 328 |
+
return False
|
| 329 |
+
except Exception:
|
| 330 |
+
self.status_label.config(text="❌ Server Offline", foreground="red")
|
| 331 |
+
return False
|
| 332 |
+
|
| 333 |
+
def update_current_settings_display(self):
|
| 334 |
+
"""Update current settings display"""
|
| 335 |
+
settings_text = f"""GPU Semaphore: {self.current_settings['gpu_semaphore']} (concurrent GPU tasks)
|
| 336 |
+
CPU Semaphore: {self.current_settings['cpu_semaphore']} (concurrent CPU tasks)
|
| 337 |
+
I/O Semaphore: {self.current_settings['io_semaphore']} (concurrent I/O tasks)
|
| 338 |
+
Thread Pool: {self.current_settings['thread_pool']} (worker threads)"""
|
| 339 |
+
|
| 340 |
+
self.current_settings_text.config(state="normal")
|
| 341 |
+
self.current_settings_text.delete(1.0, tk.END)
|
| 342 |
+
self.current_settings_text.insert(1.0, settings_text)
|
| 343 |
+
self.current_settings_text.config(state="disabled")
|
| 344 |
+
|
| 345 |
+
def refresh_monitor(self):
|
| 346 |
+
"""Refresh monitor data"""
|
| 347 |
+
try:
|
| 348 |
+
# Get detailed status
|
| 349 |
+
response = requests.get(f"{self.server_url}/status", timeout=3)
|
| 350 |
+
if response.status_code == 200:
|
| 351 |
+
data = response.json()
|
| 352 |
+
|
| 353 |
+
resource_info = "=== RESOURCE USAGE ===\n"
|
| 354 |
+
|
| 355 |
+
if 'async_processing' in data:
|
| 356 |
+
async_data = data['async_processing']
|
| 357 |
+
for resource, info in async_data.items():
|
| 358 |
+
usage_pct = (info['in_use'] / info['max_capacity']) * 100
|
| 359 |
+
resource_info += f"{resource}: {info['in_use']}/{info['max_capacity']} ({usage_pct:.0f}% used)\n"
|
| 360 |
+
|
| 361 |
+
if 'model_info' in data:
|
| 362 |
+
model_info = data['model_info']
|
| 363 |
+
resource_info += f"\nModel: {model_info.get('device', 'unknown')}\n"
|
| 364 |
+
resource_info += f"Cache: {model_info.get('reference_cache_size', 0)} voices cached\n"
|
| 365 |
+
|
| 366 |
+
resource_info += f"\nLast Updated: {datetime.now().strftime('%H:%M:%S')}"
|
| 367 |
+
|
| 368 |
+
self.resource_text.config(state="normal")
|
| 369 |
+
self.resource_text.delete(1.0, tk.END)
|
| 370 |
+
self.resource_text.insert(1.0, resource_info)
|
| 371 |
+
self.resource_text.config(state="disabled")
|
| 372 |
+
|
| 373 |
+
except requests.exceptions.RequestException:
|
| 374 |
+
# Don't log connection errors to avoid spam
|
| 375 |
+
pass
|
| 376 |
+
except Exception as e:
|
| 377 |
+
self.log_activity(f"Monitor error: {str(e)}")
|
| 378 |
+
|
| 379 |
+
def clear_monitor_display(self):
|
| 380 |
+
"""Clear monitor display when server is offline"""
|
| 381 |
+
offline_info = "=== SERVER OFFLINE ===\n\nServer is not running.\nUse 'Start Server' button to launch the server.\n\nMonitoring will resume when server is online."
|
| 382 |
+
|
| 383 |
+
self.resource_text.config(state="normal")
|
| 384 |
+
self.resource_text.delete(1.0, tk.END)
|
| 385 |
+
self.resource_text.insert(1.0, offline_info)
|
| 386 |
+
self.resource_text.config(state="disabled")
|
| 387 |
+
|
| 388 |
+
def log_activity(self, message):
|
| 389 |
+
"""Log activity to monitor"""
|
| 390 |
+
timestamp = datetime.now().strftime("%H:%M:%S")
|
| 391 |
+
log_message = f"[{timestamp}] {message}\n"
|
| 392 |
+
|
| 393 |
+
self.activity_text.config(state="normal")
|
| 394 |
+
self.activity_text.insert(tk.END, log_message)
|
| 395 |
+
self.activity_text.see(tk.END)
|
| 396 |
+
self.activity_text.config(state="disabled")
|
| 397 |
+
|
| 398 |
+
# Keep only last 100 lines
|
| 399 |
+
lines = self.activity_text.get(1.0, tk.END).split('\n')
|
| 400 |
+
if len(lines) > 100:
|
| 401 |
+
self.activity_text.config(state="normal")
|
| 402 |
+
self.activity_text.delete(1.0, f"{len(lines)-100}.0")
|
| 403 |
+
self.activity_text.config(state="disabled")
|
| 404 |
+
|
| 405 |
+
def start_monitoring(self):
|
| 406 |
+
"""Start monitoring loop"""
|
| 407 |
+
def monitor_loop():
|
| 408 |
+
while True:
|
| 409 |
+
server_online = self.check_server_status()
|
| 410 |
+
|
| 411 |
+
# Only refresh monitor if server is online and auto refresh is enabled
|
| 412 |
+
if server_online and self.auto_refresh_var.get():
|
| 413 |
+
self.refresh_monitor()
|
| 414 |
+
elif not server_online:
|
| 415 |
+
# Clear monitor display when server is offline
|
| 416 |
+
self.clear_monitor_display()
|
| 417 |
+
|
| 418 |
+
time.sleep(10) # Update every 10 seconds to reduce spam
|
| 419 |
+
|
| 420 |
+
threading.Thread(target=monitor_loop, daemon=True).start()
|
| 421 |
+
|
| 422 |
+
# Initial updates
|
| 423 |
+
self.update_current_settings_display()
|
| 424 |
+
|
| 425 |
+
def open_api_docs(self):
|
| 426 |
+
"""Open API documentation"""
|
| 427 |
+
import webbrowser
|
| 428 |
+
webbrowser.open(f"{self.server_url}/docs")
|
| 429 |
+
|
| 430 |
+
def test_server(self):
|
| 431 |
+
"""Quick server test"""
|
| 432 |
+
try:
|
| 433 |
+
test_data = {
|
| 434 |
+
"text": "Test server connection",
|
| 435 |
+
"voice_choice": "Tuyên (nam miền Bắc)",
|
| 436 |
+
"speed_factor": 1.0
|
| 437 |
+
}
|
| 438 |
+
|
| 439 |
+
self.log_activity("Testing server...")
|
| 440 |
+
response = requests.post(f"{self.server_url}/tts", json=test_data, timeout=30)
|
| 441 |
+
|
| 442 |
+
if response.status_code == 200:
|
| 443 |
+
result = response.json()
|
| 444 |
+
self.log_activity(f"✅ Server test successful! Processing time: {result.get('processing_time', 0):.2f}s")
|
| 445 |
+
else:
|
| 446 |
+
self.log_activity(f"❌ Server test failed: HTTP {response.status_code}")
|
| 447 |
+
|
| 448 |
+
except Exception as e:
|
| 449 |
+
self.log_activity(f"❌ Server test error: {str(e)}")
|
| 450 |
+
|
| 451 |
+
def clear_cache(self):
|
| 452 |
+
"""Clear server cache"""
|
| 453 |
+
try:
|
| 454 |
+
response = requests.post(f"{self.server_url}/admin/clear_cache", timeout=5)
|
| 455 |
+
if response.status_code == 200:
|
| 456 |
+
self.log_activity("✅ Cache cleared successfully")
|
| 457 |
+
else:
|
| 458 |
+
self.log_activity("❌ Failed to clear cache")
|
| 459 |
+
except Exception as e:
|
| 460 |
+
self.log_activity(f"❌ Cache clear error: {str(e)}")
|
| 461 |
+
|
| 462 |
+
def main():
|
| 463 |
+
root = tk.Tk()
|
| 464 |
+
app = ServerManager(root)
|
| 465 |
+
root.mainloop()
|
| 466 |
+
|
| 467 |
+
if __name__ == "__main__":
|
| 468 |
+
main()
|
tts_server.py
ADDED
|
@@ -0,0 +1,1165 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
FastAPI TTS Server với Speed Control + Async Processing
|
| 3 |
+
"""
|
| 4 |
+
from fastapi import FastAPI, HTTPException
|
| 5 |
+
from fastapi.responses import FileResponse
|
| 6 |
+
from pydantic import BaseModel
|
| 7 |
+
from typing import List
|
| 8 |
+
import tempfile
|
| 9 |
+
import os
|
| 10 |
+
import time
|
| 11 |
+
from datetime import datetime
|
| 12 |
+
import soundfile as sf
|
| 13 |
+
import numpy as np
|
| 14 |
+
from pydub import AudioSegment
|
| 15 |
+
import torch
|
| 16 |
+
from vieneu_tts import VieNeuTTS
|
| 17 |
+
import asyncio
|
| 18 |
+
import concurrent.futures
|
| 19 |
+
import threading
|
| 20 |
+
|
| 21 |
+
# ==========================================
|
| 22 |
+
# SETUP
|
| 23 |
+
# ==========================================
|
| 24 |
+
app = FastAPI(title="VieNeu-TTS API", version="1.0.0")
|
| 25 |
+
|
| 26 |
+
# Global variables
|
| 27 |
+
tts = None
|
| 28 |
+
device = None
|
| 29 |
+
|
| 30 |
+
# Async control - Semaphores để kiểm soát tài nguyên
|
| 31 |
+
gpu_semaphore = None # Chỉ 1 GPU task cùng lúc
|
| 32 |
+
cpu_semaphore = None # 3 CPU tasks cùng lúc
|
| 33 |
+
io_semaphore = None # 5 I/O tasks cùng lúc
|
| 34 |
+
|
| 35 |
+
# Thread pool cho blocking operations
|
| 36 |
+
thread_pool = None
|
| 37 |
+
|
| 38 |
+
# Voice samples
|
| 39 |
+
VOICE_SAMPLES = {
|
| 40 |
+
"Tuyên (nam miền Bắc)": {"audio": "./sample/Tuyên (nam miền Bắc).wav", "text": "./sample/Tuyên (nam miền Bắc).txt"},
|
| 41 |
+
"Vĩnh (nam miền Nam)": {"audio": "./sample/Vĩnh (nam miền Nam).wav", "text": "./sample/Vĩnh (nam miền Nam).txt"},
|
| 42 |
+
"Bình (nam miền Bắc)": {"audio": "./sample/Bình (nam miền Bắc).wav", "text": "./sample/Bình (nam miền Bắc).txt"},
|
| 43 |
+
"Nguyên (nam miền Nam)": {"audio": "./sample/Nguyên (nam miền Nam).wav", "text": "./sample/Nguyên (nam miền Nam).txt"},
|
| 44 |
+
"Sơn (nam miền Nam)": {"audio": "./sample/Sơn (nam miền Nam).wav", "text": "./sample/Sơn (nam miền Nam).txt"},
|
| 45 |
+
"Đoan (nữ miền Nam)": {"audio": "./sample/Đoan (nữ miền Nam).wav", "text": "./sample/Đoan (nữ miền Nam).txt"},
|
| 46 |
+
"Ngọc (nữ miền Bắc)": {"audio": "./sample/Ngọc (nữ miền Bắc).wav", "text": "./sample/Ngọc (nữ miền Bắc).txt"},
|
| 47 |
+
"Ly (nữ miền Bắc)": {"audio": "./sample/Ly (nữ miền Bắc).wav", "text": "./sample/Ly (nữ miền Bắc).txt"},
|
| 48 |
+
"Dung (nữ miền Nam)": {"audio": "./sample/Dung (nữ miền Nam).wav", "text": "./sample/Dung (nữ miền Nam).txt"},
|
| 49 |
+
"Nhỏ Ngọt Ngào": {"audio": "./sample/Nhỏ Ngọt Ngào.wav", "text": "./sample/Nhỏ Ngọt Ngào.txt"},
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
# Cache for reference codes
|
| 53 |
+
reference_cache = {}
|
| 54 |
+
|
| 55 |
+
# ==========================================
|
| 56 |
+
# MODELS
|
| 57 |
+
# ==========================================
|
| 58 |
+
class TTSRequest(BaseModel):
|
| 59 |
+
text: str
|
| 60 |
+
voice_choice: str = "Tuyên (nam miền Bắc)"
|
| 61 |
+
speed_factor: float = 1.0
|
| 62 |
+
|
| 63 |
+
class TTSResponse(BaseModel):
|
| 64 |
+
audio_url: str
|
| 65 |
+
status: str
|
| 66 |
+
processing_time: float
|
| 67 |
+
voice_used: str
|
| 68 |
+
speed_applied: float
|
| 69 |
+
|
| 70 |
+
# ==========================================
|
| 71 |
+
# HELPER FUNCTIONS
|
| 72 |
+
# ==========================================
|
| 73 |
+
def apply_speed_control(audio, speed_factor):
|
| 74 |
+
"""Áp dụng speed control với Pydub"""
|
| 75 |
+
if speed_factor == 1.0:
|
| 76 |
+
return audio
|
| 77 |
+
|
| 78 |
+
sr = 24000
|
| 79 |
+
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp:
|
| 80 |
+
sf.write(tmp.name, audio, sr)
|
| 81 |
+
tmp_path = tmp.name
|
| 82 |
+
|
| 83 |
+
sound = AudioSegment.from_wav(tmp_path)
|
| 84 |
+
new_frame_rate = int(sound.frame_rate * speed_factor)
|
| 85 |
+
sound_stretched = sound._spawn(sound.raw_data, overrides={'frame_rate': new_frame_rate})
|
| 86 |
+
sound_stretched = sound_stretched.set_frame_rate(sr)
|
| 87 |
+
|
| 88 |
+
audio_stretched = np.array(sound_stretched.get_array_of_samples()).astype(np.float32) / 32768.0
|
| 89 |
+
if sound_stretched.channels == 2:
|
| 90 |
+
audio_stretched = audio_stretched.reshape((-1, 2)).mean(axis=1)
|
| 91 |
+
|
| 92 |
+
os.unlink(tmp_path)
|
| 93 |
+
return audio_stretched
|
| 94 |
+
|
| 95 |
+
# ==========================================
|
| 96 |
+
# API ENDPOINTS
|
| 97 |
+
# ==========================================
|
| 98 |
+
@app.get("/")
|
| 99 |
+
async def root():
|
| 100 |
+
return {
|
| 101 |
+
"message": "VieNeu-TTS API Server with Async Processing",
|
| 102 |
+
"version": "1.0.0",
|
| 103 |
+
"available_voices": list(VOICE_SAMPLES.keys()),
|
| 104 |
+
"async_features": {
|
| 105 |
+
"gpu_semaphore": "2 concurrent GPU tasks",
|
| 106 |
+
"cpu_semaphore": "4 concurrent CPU tasks",
|
| 107 |
+
"io_semaphore": "6 concurrent I/O tasks",
|
| 108 |
+
"thread_pool": "6 worker threads"
|
| 109 |
+
},
|
| 110 |
+
"endpoints": {
|
| 111 |
+
"POST /tts": "Synthesize speech (standard)",
|
| 112 |
+
"POST /fast-tts": "Fast TTS for external apps",
|
| 113 |
+
"POST /bulk-tts": "Bulk processing (up to 50 requests)",
|
| 114 |
+
"GET /voices": "List available voices",
|
| 115 |
+
"GET /health": "Health check with async status",
|
| 116 |
+
"GET /status": "Detailed async resource status",
|
| 117 |
+
"POST /admin/update_settings": "Update async settings real-time",
|
| 118 |
+
"POST /admin/clear_cache": "Clear reference cache",
|
| 119 |
+
"GET /admin/settings": "Get current settings"
|
| 120 |
+
},
|
| 121 |
+
"external_app_recommendations": {
|
| 122 |
+
"single_requests": "Use POST /fast-tts with return_base64=true",
|
| 123 |
+
"batch_requests": "Use POST /bulk-tts for up to 50 requests",
|
| 124 |
+
"performance_tips": [
|
| 125 |
+
"Use same voice for consecutive requests (cache benefit)",
|
| 126 |
+
"Keep text under 200 characters for best speed",
|
| 127 |
+
"Use return_base64=true to avoid file I/O",
|
| 128 |
+
"Consider bulk-tts for batches of 10-50 requests"
|
| 129 |
+
]
|
| 130 |
+
}
|
| 131 |
+
}
|
| 132 |
+
|
| 133 |
+
@app.get("/voices")
|
| 134 |
+
async def get_voices():
|
| 135 |
+
return {
|
| 136 |
+
"voices": list(VOICE_SAMPLES.keys()),
|
| 137 |
+
"total": len(VOICE_SAMPLES)
|
| 138 |
+
}
|
| 139 |
+
|
| 140 |
+
@app.get("/health")
|
| 141 |
+
async def health_check():
|
| 142 |
+
model_status = "loaded" if tts is not None else "not_loaded"
|
| 143 |
+
|
| 144 |
+
# Check CUDA memory if using GPU
|
| 145 |
+
cuda_info = {}
|
| 146 |
+
if device == "cuda" and torch.cuda.is_available():
|
| 147 |
+
cuda_info = {
|
| 148 |
+
"cuda_memory_allocated": f"{torch.cuda.memory_allocated(0) / 1024**3:.2f} GB",
|
| 149 |
+
"cuda_memory_reserved": f"{torch.cuda.memory_reserved(0) / 1024**3:.2f} GB"
|
| 150 |
+
}
|
| 151 |
+
|
| 152 |
+
# Async resource status
|
| 153 |
+
async_status = {}
|
| 154 |
+
if gpu_semaphore and cpu_semaphore and io_semaphore:
|
| 155 |
+
async_status = {
|
| 156 |
+
"gpu_available": gpu_semaphore._value,
|
| 157 |
+
"cpu_available": cpu_semaphore._value,
|
| 158 |
+
"io_available": io_semaphore._value,
|
| 159 |
+
"thread_pool_active": thread_pool._threads if thread_pool else 0
|
| 160 |
+
}
|
| 161 |
+
|
| 162 |
+
return {
|
| 163 |
+
"status": "healthy",
|
| 164 |
+
"model_status": model_status,
|
| 165 |
+
"device": device,
|
| 166 |
+
"cache_size": len(reference_cache),
|
| 167 |
+
"async_resources": async_status,
|
| 168 |
+
"timestamp": datetime.now().isoformat(),
|
| 169 |
+
**cuda_info
|
| 170 |
+
}
|
| 171 |
+
|
| 172 |
+
@app.get("/status")
|
| 173 |
+
async def get_status():
|
| 174 |
+
"""Detailed server status including async resource usage"""
|
| 175 |
+
|
| 176 |
+
# Semaphore status
|
| 177 |
+
semaphore_status = {}
|
| 178 |
+
if gpu_semaphore and cpu_semaphore and io_semaphore:
|
| 179 |
+
semaphore_status = {
|
| 180 |
+
"gpu_semaphore": {
|
| 181 |
+
"available": gpu_semaphore._value,
|
| 182 |
+
"max_capacity": 2,
|
| 183 |
+
"in_use": 2 - gpu_semaphore._value
|
| 184 |
+
},
|
| 185 |
+
"cpu_semaphore": {
|
| 186 |
+
"available": cpu_semaphore._value,
|
| 187 |
+
"max_capacity": 4,
|
| 188 |
+
"in_use": 4 - cpu_semaphore._value
|
| 189 |
+
},
|
| 190 |
+
"io_semaphore": {
|
| 191 |
+
"available": io_semaphore._value,
|
| 192 |
+
"max_capacity": 6,
|
| 193 |
+
"in_use": 6 - io_semaphore._value
|
| 194 |
+
}
|
| 195 |
+
}
|
| 196 |
+
|
| 197 |
+
# Thread pool status
|
| 198 |
+
thread_status = {}
|
| 199 |
+
if thread_pool:
|
| 200 |
+
thread_status = {
|
| 201 |
+
"max_workers": thread_pool._max_workers,
|
| 202 |
+
"active_threads": len(thread_pool._threads) if hasattr(thread_pool, '_threads') else 0
|
| 203 |
+
}
|
| 204 |
+
|
| 205 |
+
# Model and cache info
|
| 206 |
+
model_info = {
|
| 207 |
+
"model_loaded": tts is not None,
|
| 208 |
+
"device": device,
|
| 209 |
+
"reference_cache_size": len(reference_cache),
|
| 210 |
+
"cached_voices": list(reference_cache.keys())
|
| 211 |
+
}
|
| 212 |
+
|
| 213 |
+
return {
|
| 214 |
+
"server_status": "running",
|
| 215 |
+
"async_processing": semaphore_status,
|
| 216 |
+
"thread_pool": thread_status,
|
| 217 |
+
"model_info": model_info,
|
| 218 |
+
"available_voices": list(VOICE_SAMPLES.keys()),
|
| 219 |
+
"timestamp": datetime.now().isoformat()
|
| 220 |
+
}
|
| 221 |
+
|
| 222 |
+
async def _load_reference_text(ref_text_path: str) -> str:
|
| 223 |
+
"""Load reference text with I/O semaphore"""
|
| 224 |
+
async with io_semaphore:
|
| 225 |
+
loop = asyncio.get_event_loop()
|
| 226 |
+
with open(ref_text_path, "r", encoding="utf-8") as f:
|
| 227 |
+
return await loop.run_in_executor(thread_pool, f.read)
|
| 228 |
+
|
| 229 |
+
async def _encode_reference_async(ref_audio_path: str, cache_key: str, request_id: str = "") -> torch.Tensor:
|
| 230 |
+
"""Encode reference audio with GPU semaphore"""
|
| 231 |
+
async with gpu_semaphore:
|
| 232 |
+
print(f" 🔄 [{request_id}] Encoding reference for {cache_key}...")
|
| 233 |
+
loop = asyncio.get_event_loop()
|
| 234 |
+
|
| 235 |
+
def encode_sync():
|
| 236 |
+
try:
|
| 237 |
+
# Clear CUDA cache before encoding
|
| 238 |
+
if device == "cuda":
|
| 239 |
+
torch.cuda.empty_cache()
|
| 240 |
+
|
| 241 |
+
ref_codes = tts.encode_reference(ref_audio_path)
|
| 242 |
+
|
| 243 |
+
# Ensure ref_codes is on CPU for caching
|
| 244 |
+
if hasattr(ref_codes, 'cpu'):
|
| 245 |
+
ref_codes = ref_codes.cpu()
|
| 246 |
+
|
| 247 |
+
return ref_codes
|
| 248 |
+
except Exception as e:
|
| 249 |
+
print(f" ❌ [{request_id}] Failed to encode reference: {e}")
|
| 250 |
+
raise e
|
| 251 |
+
|
| 252 |
+
ref_codes = await loop.run_in_executor(thread_pool, encode_sync)
|
| 253 |
+
reference_cache[cache_key] = ref_codes
|
| 254 |
+
print(f" ✅ [{request_id}] Reference encoded and cached")
|
| 255 |
+
return ref_codes
|
| 256 |
+
|
| 257 |
+
async def _generate_speech_async(text: str, ref_codes: torch.Tensor, ref_text_raw: str, request_id: str = "") -> np.ndarray:
|
| 258 |
+
"""Generate speech with GPU semaphore"""
|
| 259 |
+
async with gpu_semaphore:
|
| 260 |
+
print(f" 🎵 [{request_id}] Generating speech...")
|
| 261 |
+
loop = asyncio.get_event_loop()
|
| 262 |
+
|
| 263 |
+
def infer_sync():
|
| 264 |
+
try:
|
| 265 |
+
# Clear CUDA cache before inference
|
| 266 |
+
if device == "cuda":
|
| 267 |
+
torch.cuda.empty_cache()
|
| 268 |
+
|
| 269 |
+
wav = tts.infer(text, ref_codes, ref_text_raw)
|
| 270 |
+
return wav
|
| 271 |
+
except Exception as e:
|
| 272 |
+
print(f" ❌ [{request_id}] Failed to generate speech: {e}")
|
| 273 |
+
raise e
|
| 274 |
+
|
| 275 |
+
wav = await loop.run_in_executor(thread_pool, infer_sync)
|
| 276 |
+
print(f" ✅ [{request_id}] Speech generated")
|
| 277 |
+
return wav
|
| 278 |
+
|
| 279 |
+
async def _apply_speed_control_async(audio: np.ndarray, speed_factor: float, request_id: str = "") -> np.ndarray:
|
| 280 |
+
"""Apply speed control with CPU semaphore"""
|
| 281 |
+
if speed_factor == 1.0:
|
| 282 |
+
return audio
|
| 283 |
+
|
| 284 |
+
async with cpu_semaphore:
|
| 285 |
+
print(f" 🎚️ [{request_id}] Applying speed control: {speed_factor}x")
|
| 286 |
+
loop = asyncio.get_event_loop()
|
| 287 |
+
|
| 288 |
+
def speed_control_sync():
|
| 289 |
+
return apply_speed_control(audio, speed_factor)
|
| 290 |
+
|
| 291 |
+
return await loop.run_in_executor(thread_pool, speed_control_sync)
|
| 292 |
+
|
| 293 |
+
async def _save_audio_async(wav: np.ndarray, output_path: str) -> None:
|
| 294 |
+
"""Save audio file with I/O semaphore"""
|
| 295 |
+
async with io_semaphore:
|
| 296 |
+
loop = asyncio.get_event_loop()
|
| 297 |
+
|
| 298 |
+
def save_sync():
|
| 299 |
+
sf.write(output_path, wav, 24000)
|
| 300 |
+
|
| 301 |
+
await loop.run_in_executor(thread_pool, save_sync)
|
| 302 |
+
|
| 303 |
+
@app.post("/tts", response_model=TTSResponse)
|
| 304 |
+
async def synthesize_speech(request: TTSRequest):
|
| 305 |
+
"""
|
| 306 |
+
Tổng hợp giọng nói với speed control - Async Processing
|
| 307 |
+
"""
|
| 308 |
+
start_time = time.time()
|
| 309 |
+
|
| 310 |
+
try:
|
| 311 |
+
# Validate input
|
| 312 |
+
if not request.text or len(request.text.strip()) == 0:
|
| 313 |
+
raise HTTPException(status_code=400, detail="Text cannot be empty")
|
| 314 |
+
|
| 315 |
+
if len(request.text) > 500:
|
| 316 |
+
raise HTTPException(status_code=400, detail="Text too long (max 500 characters)")
|
| 317 |
+
|
| 318 |
+
if request.voice_choice not in VOICE_SAMPLES:
|
| 319 |
+
raise HTTPException(status_code=400, detail=f"Voice not found. Available: {list(VOICE_SAMPLES.keys())}")
|
| 320 |
+
|
| 321 |
+
if not (0.5 <= request.speed_factor <= 2.0):
|
| 322 |
+
raise HTTPException(status_code=400, detail="Speed factor must be between 0.5 and 2.0")
|
| 323 |
+
|
| 324 |
+
request_id = f"REQ-{int(time.time() * 1000) % 100000}"
|
| 325 |
+
print(f"🎤 [{request_id}] Processing: {request.text[:50]}...")
|
| 326 |
+
print(f"🎚️ [{request_id}] Voice: {request.voice_choice}, Speed: {request.speed_factor}x")
|
| 327 |
+
|
| 328 |
+
# Get reference audio and text paths
|
| 329 |
+
voice_info = VOICE_SAMPLES[request.voice_choice]
|
| 330 |
+
ref_audio_path = voice_info["audio"]
|
| 331 |
+
ref_text_path = voice_info["text"]
|
| 332 |
+
|
| 333 |
+
if not os.path.exists(ref_audio_path):
|
| 334 |
+
raise HTTPException(status_code=500, detail=f"Reference audio not found: {ref_audio_path}")
|
| 335 |
+
|
| 336 |
+
# Load reference text (async I/O)
|
| 337 |
+
ref_text_raw = await _load_reference_text(ref_text_path)
|
| 338 |
+
|
| 339 |
+
# Encode reference (with cache) - async GPU
|
| 340 |
+
cache_key = request.voice_choice
|
| 341 |
+
if cache_key in reference_cache:
|
| 342 |
+
print(f" ✨ [{request_id}] Using cached reference for {cache_key}")
|
| 343 |
+
ref_codes = reference_cache[cache_key]
|
| 344 |
+
else:
|
| 345 |
+
ref_codes = await _encode_reference_async(ref_audio_path, cache_key, request_id)
|
| 346 |
+
|
| 347 |
+
# Generate speech - async GPU
|
| 348 |
+
wav = await _generate_speech_async(request.text, ref_codes, ref_text_raw, request_id)
|
| 349 |
+
|
| 350 |
+
# Apply speed control - async CPU
|
| 351 |
+
wav = await _apply_speed_control_async(wav, request.speed_factor, request_id)
|
| 352 |
+
|
| 353 |
+
# Prepare output path
|
| 354 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")[:-3] # milliseconds
|
| 355 |
+
filename = f"tts_{request.speed_factor}x_{timestamp}.wav"
|
| 356 |
+
|
| 357 |
+
# Create temp directory if not exists
|
| 358 |
+
temp_dir = "./temp_audio"
|
| 359 |
+
os.makedirs(temp_dir, exist_ok=True)
|
| 360 |
+
|
| 361 |
+
output_path = os.path.join(temp_dir, filename)
|
| 362 |
+
|
| 363 |
+
# Save output - async I/O
|
| 364 |
+
await _save_audio_async(wav, output_path)
|
| 365 |
+
|
| 366 |
+
processing_time = time.time() - start_time
|
| 367 |
+
|
| 368 |
+
print(f" ✅ [{request_id}] Success! Processing time: {processing_time:.2f}s")
|
| 369 |
+
print(f" 📁 [{request_id}] Saved: {output_path}")
|
| 370 |
+
|
| 371 |
+
# Return response
|
| 372 |
+
return TTSResponse(
|
| 373 |
+
audio_url=f"/audio/{filename}",
|
| 374 |
+
status="success",
|
| 375 |
+
processing_time=processing_time,
|
| 376 |
+
voice_used=request.voice_choice,
|
| 377 |
+
speed_applied=request.speed_factor
|
| 378 |
+
)
|
| 379 |
+
|
| 380 |
+
except HTTPException:
|
| 381 |
+
raise
|
| 382 |
+
except Exception as e:
|
| 383 |
+
processing_time = time.time() - start_time
|
| 384 |
+
print(f" ❌ [{request_id}] Error: {str(e)}")
|
| 385 |
+
raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
|
| 386 |
+
|
| 387 |
+
@app.get("/audio/{filename}")
|
| 388 |
+
async def get_audio(filename: str):
|
| 389 |
+
"""
|
| 390 |
+
Tải file audio đã tạo
|
| 391 |
+
"""
|
| 392 |
+
file_path = os.path.join("./temp_audio", filename)
|
| 393 |
+
|
| 394 |
+
if not os.path.exists(file_path):
|
| 395 |
+
raise HTTPException(status_code=404, detail="Audio file not found")
|
| 396 |
+
|
| 397 |
+
return FileResponse(
|
| 398 |
+
path=file_path,
|
| 399 |
+
media_type="audio/wav",
|
| 400 |
+
filename=filename
|
| 401 |
+
)
|
| 402 |
+
|
| 403 |
+
# ==========================================
|
| 404 |
+
# ADMIN ENDPOINTS
|
| 405 |
+
# ==========================================
|
| 406 |
+
class SettingsUpdate(BaseModel):
|
| 407 |
+
gpu_semaphore: int = 2
|
| 408 |
+
cpu_semaphore: int = 4
|
| 409 |
+
io_semaphore: int = 6
|
| 410 |
+
thread_pool: int = 6
|
| 411 |
+
|
| 412 |
+
@app.post("/admin/update_settings")
|
| 413 |
+
async def update_settings(settings: SettingsUpdate):
|
| 414 |
+
"""
|
| 415 |
+
Cập nhật async settings real-time
|
| 416 |
+
"""
|
| 417 |
+
global gpu_semaphore, cpu_semaphore, io_semaphore, thread_pool
|
| 418 |
+
|
| 419 |
+
try:
|
| 420 |
+
# Validate settings
|
| 421 |
+
if not (1 <= settings.gpu_semaphore <= 4):
|
| 422 |
+
raise HTTPException(status_code=400, detail="GPU semaphore must be between 1-4")
|
| 423 |
+
if not (2 <= settings.cpu_semaphore <= 16):
|
| 424 |
+
raise HTTPException(status_code=400, detail="CPU semaphore must be between 2-16")
|
| 425 |
+
if not (3 <= settings.io_semaphore <= 16):
|
| 426 |
+
raise HTTPException(status_code=400, detail="I/O semaphore must be between 3-16")
|
| 427 |
+
if not (2 <= settings.thread_pool <= 20):
|
| 428 |
+
raise HTTPException(status_code=400, detail="Thread pool must be between 2-20")
|
| 429 |
+
|
| 430 |
+
# Update semaphores
|
| 431 |
+
gpu_semaphore = asyncio.Semaphore(settings.gpu_semaphore)
|
| 432 |
+
cpu_semaphore = asyncio.Semaphore(settings.cpu_semaphore)
|
| 433 |
+
io_semaphore = asyncio.Semaphore(settings.io_semaphore)
|
| 434 |
+
|
| 435 |
+
# Update thread pool (need to shutdown old one)
|
| 436 |
+
if thread_pool:
|
| 437 |
+
old_pool = thread_pool
|
| 438 |
+
thread_pool = concurrent.futures.ThreadPoolExecutor(max_workers=settings.thread_pool)
|
| 439 |
+
# Shutdown old pool gracefully
|
| 440 |
+
threading.Thread(target=lambda: old_pool.shutdown(wait=True), daemon=True).start()
|
| 441 |
+
else:
|
| 442 |
+
thread_pool = concurrent.futures.ThreadPoolExecutor(max_workers=settings.thread_pool)
|
| 443 |
+
|
| 444 |
+
print(f"🔄 Settings updated: GPU({settings.gpu_semaphore}) CPU({settings.cpu_semaphore}) I/O({settings.io_semaphore}) Threads({settings.thread_pool})")
|
| 445 |
+
|
| 446 |
+
return {
|
| 447 |
+
"status": "success",
|
| 448 |
+
"message": "Settings updated successfully",
|
| 449 |
+
"new_settings": {
|
| 450 |
+
"gpu_semaphore": settings.gpu_semaphore,
|
| 451 |
+
"cpu_semaphore": settings.cpu_semaphore,
|
| 452 |
+
"io_semaphore": settings.io_semaphore,
|
| 453 |
+
"thread_pool": settings.thread_pool
|
| 454 |
+
},
|
| 455 |
+
"timestamp": datetime.now().isoformat()
|
| 456 |
+
}
|
| 457 |
+
|
| 458 |
+
except HTTPException:
|
| 459 |
+
raise
|
| 460 |
+
except Exception as e:
|
| 461 |
+
print(f"❌ Failed to update settings: {str(e)}")
|
| 462 |
+
raise HTTPException(status_code=500, detail=f"Failed to update settings: {str(e)}")
|
| 463 |
+
|
| 464 |
+
@app.post("/admin/clear_cache")
|
| 465 |
+
async def clear_cache():
|
| 466 |
+
"""
|
| 467 |
+
Xóa reference cache
|
| 468 |
+
"""
|
| 469 |
+
global reference_cache
|
| 470 |
+
|
| 471 |
+
try:
|
| 472 |
+
cache_size = len(reference_cache)
|
| 473 |
+
reference_cache.clear()
|
| 474 |
+
|
| 475 |
+
# Clear CUDA cache if available
|
| 476 |
+
if device == "cuda" and torch.cuda.is_available():
|
| 477 |
+
torch.cuda.empty_cache()
|
| 478 |
+
|
| 479 |
+
print(f"🧹 Cache cleared: {cache_size} references removed")
|
| 480 |
+
|
| 481 |
+
return {
|
| 482 |
+
"status": "success",
|
| 483 |
+
"message": f"Cache cleared successfully. {cache_size} references removed.",
|
| 484 |
+
"timestamp": datetime.now().isoformat()
|
| 485 |
+
}
|
| 486 |
+
|
| 487 |
+
except Exception as e:
|
| 488 |
+
print(f"❌ Failed to clear cache: {str(e)}")
|
| 489 |
+
raise HTTPException(status_code=500, detail=f"Failed to clear cache: {str(e)}")
|
| 490 |
+
|
| 491 |
+
@app.get("/admin/settings")
|
| 492 |
+
async def get_current_settings():
|
| 493 |
+
"""
|
| 494 |
+
Lấy settings hiện tại
|
| 495 |
+
"""
|
| 496 |
+
current_settings = {
|
| 497 |
+
"gpu_semaphore": {
|
| 498 |
+
"current": gpu_semaphore._value if gpu_semaphore else 0,
|
| 499 |
+
"max_capacity": 2 # Default, will be updated based on actual semaphore
|
| 500 |
+
},
|
| 501 |
+
"cpu_semaphore": {
|
| 502 |
+
"current": cpu_semaphore._value if cpu_semaphore else 0,
|
| 503 |
+
"max_capacity": 4
|
| 504 |
+
},
|
| 505 |
+
"io_semaphore": {
|
| 506 |
+
"current": io_semaphore._value if io_semaphore else 0,
|
| 507 |
+
"max_capacity": 6
|
| 508 |
+
},
|
| 509 |
+
"thread_pool": {
|
| 510 |
+
"max_workers": thread_pool._max_workers if thread_pool else 0,
|
| 511 |
+
"active_threads": len(thread_pool._threads) if thread_pool and hasattr(thread_pool, '_threads') else 0
|
| 512 |
+
}
|
| 513 |
+
}
|
| 514 |
+
|
| 515 |
+
return {
|
| 516 |
+
"status": "success",
|
| 517 |
+
"settings": current_settings,
|
| 518 |
+
"timestamp": datetime.now().isoformat()
|
| 519 |
+
}
|
| 520 |
+
|
| 521 |
+
# ==========================================
|
| 522 |
+
# FAST TTS ENDPOINT FOR EXTERNAL APPS
|
| 523 |
+
# ==========================================
|
| 524 |
+
class FastTTSRequest(BaseModel):
|
| 525 |
+
text: str
|
| 526 |
+
voice_choice: str = "Tuyên (nam miền Bắc)"
|
| 527 |
+
speed_factor: float = 1.0
|
| 528 |
+
return_base64: bool = False # Option to return audio as base64
|
| 529 |
+
skip_file_save: bool = False # Option to skip saving file
|
| 530 |
+
|
| 531 |
+
@app.post("/fast-tts")
|
| 532 |
+
async def fast_tts(request: FastTTSRequest):
|
| 533 |
+
"""
|
| 534 |
+
Fast TTS endpoint tối ưu cho external apps gửi nhiều requests
|
| 535 |
+
- Ít validation hơn
|
| 536 |
+
- Có thể return base64 thay vì file
|
| 537 |
+
- Có thể skip file saving
|
| 538 |
+
"""
|
| 539 |
+
start_time = time.time()
|
| 540 |
+
request_id = f"FAST-{int(time.time() * 1000) % 100000}"
|
| 541 |
+
|
| 542 |
+
try:
|
| 543 |
+
# Minimal validation
|
| 544 |
+
if not request.text or len(request.text.strip()) == 0:
|
| 545 |
+
raise HTTPException(status_code=400, detail="Text cannot be empty")
|
| 546 |
+
|
| 547 |
+
if len(request.text) > 1000: # Increased limit for external apps
|
| 548 |
+
raise HTTPException(status_code=400, detail="Text too long (max 1000 characters)")
|
| 549 |
+
|
| 550 |
+
if request.voice_choice not in VOICE_SAMPLES:
|
| 551 |
+
# Auto fallback to default voice instead of error
|
| 552 |
+
request.voice_choice = "Tuyên (nam miền Bắc)"
|
| 553 |
+
|
| 554 |
+
if not (0.5 <= request.speed_factor <= 2.0):
|
| 555 |
+
request.speed_factor = 1.0 # Auto fallback instead of error
|
| 556 |
+
|
| 557 |
+
print(f"⚡ [{request_id}] Fast processing: {request.text[:30]}... | {request.voice_choice} | {request.speed_factor}x")
|
| 558 |
+
|
| 559 |
+
# Get reference (with cache)
|
| 560 |
+
voice_info = VOICE_SAMPLES[request.voice_choice]
|
| 561 |
+
ref_audio_path = voice_info["audio"]
|
| 562 |
+
ref_text_path = voice_info["text"]
|
| 563 |
+
|
| 564 |
+
# Load reference text (async I/O)
|
| 565 |
+
ref_text_raw = await _load_reference_text(ref_text_path)
|
| 566 |
+
|
| 567 |
+
# Encode reference (with cache) - async GPU
|
| 568 |
+
cache_key = request.voice_choice
|
| 569 |
+
if cache_key in reference_cache:
|
| 570 |
+
ref_codes = reference_cache[cache_key]
|
| 571 |
+
else:
|
| 572 |
+
ref_codes = await _encode_reference_async(ref_audio_path, cache_key, request_id)
|
| 573 |
+
|
| 574 |
+
# Generate speech - async GPU
|
| 575 |
+
wav = await _generate_speech_async(request.text, ref_codes, ref_text_raw, request_id)
|
| 576 |
+
|
| 577 |
+
# Apply speed control - async CPU
|
| 578 |
+
wav = await _apply_speed_control_async(wav, request.speed_factor, request_id)
|
| 579 |
+
|
| 580 |
+
processing_time = time.time() - start_time
|
| 581 |
+
|
| 582 |
+
# Return options
|
| 583 |
+
if request.return_base64:
|
| 584 |
+
# Return audio as base64 (no file saving)
|
| 585 |
+
import base64
|
| 586 |
+
import io
|
| 587 |
+
|
| 588 |
+
# Convert to bytes
|
| 589 |
+
audio_buffer = io.BytesIO()
|
| 590 |
+
sf.write(audio_buffer, wav, 24000, format='WAV')
|
| 591 |
+
audio_bytes = audio_buffer.getvalue()
|
| 592 |
+
audio_base64 = base64.b64encode(audio_bytes).decode('utf-8')
|
| 593 |
+
|
| 594 |
+
print(f" ✅ [{request_id}] Fast success (base64): {processing_time:.2f}s")
|
| 595 |
+
|
| 596 |
+
return {
|
| 597 |
+
"audio_base64": audio_base64,
|
| 598 |
+
"status": "success",
|
| 599 |
+
"processing_time": processing_time,
|
| 600 |
+
"voice_used": request.voice_choice,
|
| 601 |
+
"speed_applied": request.speed_factor,
|
| 602 |
+
"format": "wav",
|
| 603 |
+
"sample_rate": 24000
|
| 604 |
+
}
|
| 605 |
+
|
| 606 |
+
elif request.skip_file_save:
|
| 607 |
+
# Return raw audio data info (for streaming)
|
| 608 |
+
print(f" ✅ [{request_id}] Fast success (no save): {processing_time:.2f}s")
|
| 609 |
+
|
| 610 |
+
return {
|
| 611 |
+
"status": "success",
|
| 612 |
+
"processing_time": processing_time,
|
| 613 |
+
"voice_used": request.voice_choice,
|
| 614 |
+
"speed_applied": request.speed_factor,
|
| 615 |
+
"audio_length": len(wav),
|
| 616 |
+
"sample_rate": 24000,
|
| 617 |
+
"message": "Audio generated but not saved"
|
| 618 |
+
}
|
| 619 |
+
|
| 620 |
+
else:
|
| 621 |
+
# Standard file saving
|
| 622 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")[:-3]
|
| 623 |
+
filename = f"fast_{request.speed_factor}x_{timestamp}.wav"
|
| 624 |
+
|
| 625 |
+
temp_dir = "./temp_audio"
|
| 626 |
+
os.makedirs(temp_dir, exist_ok=True)
|
| 627 |
+
|
| 628 |
+
output_path = os.path.join(temp_dir, filename)
|
| 629 |
+
await _save_audio_async(wav, output_path)
|
| 630 |
+
|
| 631 |
+
print(f" ✅ [{request_id}] Fast success: {processing_time:.2f}s | {filename}")
|
| 632 |
+
|
| 633 |
+
return {
|
| 634 |
+
"audio_url": f"/audio/{filename}",
|
| 635 |
+
"status": "success",
|
| 636 |
+
"processing_time": processing_time,
|
| 637 |
+
"voice_used": request.voice_choice,
|
| 638 |
+
"speed_applied": request.speed_factor,
|
| 639 |
+
"filename": filename
|
| 640 |
+
}
|
| 641 |
+
|
| 642 |
+
except HTTPException:
|
| 643 |
+
raise
|
| 644 |
+
except Exception as e:
|
| 645 |
+
processing_time = time.time() - start_time
|
| 646 |
+
print(f" ❌ [{request_id}] Fast error: {str(e)}")
|
| 647 |
+
raise HTTPException(status_code=500, detail=f"Fast TTS error: {str(e)}")
|
| 648 |
+
|
| 649 |
+
@app.post("/bulk-tts")
|
| 650 |
+
async def bulk_tts(requests: List[FastTTSRequest]):
|
| 651 |
+
"""
|
| 652 |
+
Bulk TTS endpoint - xử lý nhiều requests cùng lúc
|
| 653 |
+
Tối ưu cho external apps gửi batch
|
| 654 |
+
"""
|
| 655 |
+
if len(requests) > 50: # Limit batch size
|
| 656 |
+
raise HTTPException(status_code=400, detail="Too many requests in batch (max 50)")
|
| 657 |
+
|
| 658 |
+
start_time = time.time()
|
| 659 |
+
batch_id = f"BULK-{int(time.time() * 1000) % 100000}"
|
| 660 |
+
|
| 661 |
+
print(f"📦 [{batch_id}] Processing bulk: {len(requests)} requests")
|
| 662 |
+
|
| 663 |
+
# Process all requests concurrently
|
| 664 |
+
async def process_single(req, index):
|
| 665 |
+
try:
|
| 666 |
+
# Add index to request for tracking
|
| 667 |
+
req_copy = req.copy()
|
| 668 |
+
result = await fast_tts(req_copy)
|
| 669 |
+
return {"index": index, "status": "success", "result": result}
|
| 670 |
+
except Exception as e:
|
| 671 |
+
return {"index": index, "status": "error", "error": str(e)}
|
| 672 |
+
|
| 673 |
+
# Create tasks for all requests
|
| 674 |
+
tasks = [process_single(req, i) for i, req in enumerate(requests)]
|
| 675 |
+
results = await asyncio.gather(*tasks, return_exceptions=True)
|
| 676 |
+
|
| 677 |
+
# Process results
|
| 678 |
+
processed_results = []
|
| 679 |
+
for result in results:
|
| 680 |
+
if isinstance(result, Exception):
|
| 681 |
+
processed_results.append({"status": "exception", "error": str(result)})
|
| 682 |
+
else:
|
| 683 |
+
processed_results.append(result)
|
| 684 |
+
|
| 685 |
+
total_time = time.time() - start_time
|
| 686 |
+
success_count = len([r for r in processed_results if r.get("status") == "success"])
|
| 687 |
+
|
| 688 |
+
print(f" ✅ [{batch_id}] Bulk completed: {success_count}/{len(requests)} success in {total_time:.2f}s")
|
| 689 |
+
|
| 690 |
+
return {
|
| 691 |
+
"batch_id": batch_id,
|
| 692 |
+
"total_requests": len(requests),
|
| 693 |
+
"successful": success_count,
|
| 694 |
+
"failed": len(requests) - success_count,
|
| 695 |
+
"total_time": total_time,
|
| 696 |
+
"avg_time_per_request": total_time / len(requests),
|
| 697 |
+
"results": processed_results
|
| 698 |
+
}
|
| 699 |
+
|
| 700 |
+
# ==========================================
|
| 701 |
+
# STARTUP EVENT
|
| 702 |
+
# ==========================================
|
| 703 |
+
@app.on_event("startup")
|
| 704 |
+
async def startup_event():
|
| 705 |
+
global tts, device, gpu_semaphore, cpu_semaphore, io_semaphore, thread_pool
|
| 706 |
+
|
| 707 |
+
print("=" * 60)
|
| 708 |
+
print("🎙️ VieNeu-TTS FastAPI Server (Async)")
|
| 709 |
+
print("=" * 60)
|
| 710 |
+
|
| 711 |
+
# Setup async controls
|
| 712 |
+
gpu_semaphore = asyncio.Semaphore(2) # 2 GPU tasks (parallel inference)
|
| 713 |
+
cpu_semaphore = asyncio.Semaphore(4) # 4 CPU tasks (more speed processing)
|
| 714 |
+
io_semaphore = asyncio.Semaphore(6) # 6 I/O tasks (more file operations)
|
| 715 |
+
thread_pool = concurrent.futures.ThreadPoolExecutor(max_workers=6)
|
| 716 |
+
|
| 717 |
+
print("🔄 Async setup: GPU(2) | CPU(4) | I/O(6) | ThreadPool(6)")
|
| 718 |
+
|
| 719 |
+
# Device
|
| 720 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 721 |
+
print(f"🖥️ Using device: {device}")
|
| 722 |
+
|
| 723 |
+
# Check if local backbone exists
|
| 724 |
+
local_backbone = "./models/VieNeu-TTS"
|
| 725 |
+
|
| 726 |
+
if os.path.exists(local_backbone):
|
| 727 |
+
print("📦 Loading VieNeu-TTS model (hybrid: local backbone + online codec)...")
|
| 728 |
+
backbone_repo = local_backbone
|
| 729 |
+
codec_repo = "neuphonic/neucodec" # Codec must be online (VieNeuTTS limitation)
|
| 730 |
+
print(" 🔧 Using local backbone (no internet for backbone)")
|
| 731 |
+
print(" 🌐 Using online codec (small download)")
|
| 732 |
+
else:
|
| 733 |
+
print("📦 Loading VieNeu-TTS model from HuggingFace...")
|
| 734 |
+
backbone_repo = "pnnbao-ump/VieNeu-TTS"
|
| 735 |
+
codec_repo = "neuphonic/neucodec"
|
| 736 |
+
print(" 🌐 Using online models (internet required)")
|
| 737 |
+
print(" 💡 Run 'python download_models.py' to use local backbone")
|
| 738 |
+
|
| 739 |
+
try:
|
| 740 |
+
tts = VieNeuTTS(
|
| 741 |
+
backbone_repo=backbone_repo,
|
| 742 |
+
backbone_device=device,
|
| 743 |
+
codec_repo=codec_repo,
|
| 744 |
+
codec_device=device
|
| 745 |
+
)
|
| 746 |
+
print("✅ Model loaded successfully!")
|
| 747 |
+
except Exception as e:
|
| 748 |
+
print(f"❌ Failed to load model: {e}")
|
| 749 |
+
if not os.path.exists(local_backbone):
|
| 750 |
+
print("💡 Try running: python download_models.py")
|
| 751 |
+
raise e
|
| 752 |
+
|
| 753 |
+
print(f"📦 Model: VieNeu-TTS-1000h")
|
| 754 |
+
print(f"🎚️ Speed Control: Pydub")
|
| 755 |
+
print("=" * 60)
|
| 756 |
+
|
| 757 |
+
@app.on_event("shutdown")
|
| 758 |
+
async def shutdown_event():
|
| 759 |
+
global thread_pool
|
| 760 |
+
|
| 761 |
+
print("🔄 Shutting down server...")
|
| 762 |
+
|
| 763 |
+
# Cleanup thread pool
|
| 764 |
+
if thread_pool:
|
| 765 |
+
print(" 🧹 Shutting down thread pool...")
|
| 766 |
+
thread_pool.shutdown(wait=True)
|
| 767 |
+
print(" ✅ Thread pool shutdown complete")
|
| 768 |
+
|
| 769 |
+
# Clear CUDA cache
|
| 770 |
+
if device == "cuda" and torch.cuda.is_available():
|
| 771 |
+
torch.cuda.empty_cache()
|
| 772 |
+
print(" 🧹 CUDA cache cleared")
|
| 773 |
+
|
| 774 |
+
print("✅ Server shutdown complete")
|
| 775 |
+
|
| 776 |
+
# ==========================================
|
| 777 |
+
# STARTUP
|
| 778 |
+
# ==========================================
|
| 779 |
+
def start_gui():
|
| 780 |
+
"""Start GUI in separate thread"""
|
| 781 |
+
import tkinter as tk
|
| 782 |
+
from tkinter import ttk, messagebox
|
| 783 |
+
import webbrowser
|
| 784 |
+
import os
|
| 785 |
+
import sys
|
| 786 |
+
|
| 787 |
+
class ServerGUI:
|
| 788 |
+
def __init__(self, root):
|
| 789 |
+
self.root = root
|
| 790 |
+
self.root.title("VieNeu-TTS Server Control")
|
| 791 |
+
self.root.geometry("600x500")
|
| 792 |
+
|
| 793 |
+
# Handle window close event
|
| 794 |
+
self.root.protocol("WM_DELETE_WINDOW", self.on_closing)
|
| 795 |
+
|
| 796 |
+
# Add menu bar
|
| 797 |
+
self.setup_menu()
|
| 798 |
+
|
| 799 |
+
# Server info
|
| 800 |
+
info_frame = ttk.LabelFrame(root, text="Server Information", padding="10")
|
| 801 |
+
info_frame.pack(fill=tk.X, padx=10, pady=5)
|
| 802 |
+
|
| 803 |
+
ttk.Label(info_frame, text="🎙️ VieNeu-TTS FastAPI Server", font=("Arial", 14, "bold")).pack()
|
| 804 |
+
|
| 805 |
+
url_frame = ttk.Frame(info_frame)
|
| 806 |
+
url_frame.pack()
|
| 807 |
+
ttk.Label(url_frame, text="Server URL: ").pack(side=tk.LEFT)
|
| 808 |
+
url_label = ttk.Label(url_frame, text="http://127.0.0.1:8000", foreground="blue", cursor="hand2")
|
| 809 |
+
url_label.pack(side=tk.LEFT)
|
| 810 |
+
url_label.bind("<Button-1>", lambda e: webbrowser.open("http://127.0.0.1:8000"))
|
| 811 |
+
|
| 812 |
+
self.status_label = ttk.Label(info_frame, text="Status: ✅ Running", foreground="green")
|
| 813 |
+
self.status_label.pack()
|
| 814 |
+
|
| 815 |
+
# Quick actions
|
| 816 |
+
actions_frame = ttk.LabelFrame(root, text="Quick Actions", padding="10")
|
| 817 |
+
actions_frame.pack(fill=tk.X, padx=10, pady=5)
|
| 818 |
+
|
| 819 |
+
btn_frame = ttk.Frame(actions_frame)
|
| 820 |
+
btn_frame.pack()
|
| 821 |
+
|
| 822 |
+
ttk.Button(btn_frame, text="Open API Docs", command=self.open_docs).pack(side=tk.LEFT, padx=5)
|
| 823 |
+
ttk.Button(btn_frame, text="Test Server", command=self.test_server).pack(side=tk.LEFT, padx=5)
|
| 824 |
+
ttk.Button(btn_frame, text="Clear Cache", command=self.clear_cache).pack(side=tk.LEFT, padx=5)
|
| 825 |
+
|
| 826 |
+
# Shutdown button
|
| 827 |
+
shutdown_btn = ttk.Button(btn_frame, text="Tắt Server", command=self.shutdown_server)
|
| 828 |
+
shutdown_btn.pack(side=tk.RIGHT, padx=5)
|
| 829 |
+
shutdown_btn.configure(style="Accent.TButton") # Make it stand out
|
| 830 |
+
|
| 831 |
+
# Settings control
|
| 832 |
+
settings_frame = ttk.LabelFrame(root, text="Async Settings Control", padding="10")
|
| 833 |
+
settings_frame.pack(fill=tk.X, padx=10, pady=5)
|
| 834 |
+
|
| 835 |
+
# Current settings display
|
| 836 |
+
self.settings_text = tk.Text(settings_frame, height=4, width=60, state="disabled")
|
| 837 |
+
self.settings_text.pack(pady=(0, 10))
|
| 838 |
+
|
| 839 |
+
# Settings controls
|
| 840 |
+
control_frame = ttk.Frame(settings_frame)
|
| 841 |
+
control_frame.pack(fill=tk.X)
|
| 842 |
+
|
| 843 |
+
# GPU Semaphore
|
| 844 |
+
gpu_frame = ttk.Frame(control_frame)
|
| 845 |
+
gpu_frame.pack(fill=tk.X, pady=2)
|
| 846 |
+
ttk.Label(gpu_frame, text="GPU Semaphore (1-4):").pack(side=tk.LEFT)
|
| 847 |
+
self.gpu_var = tk.StringVar(value="2")
|
| 848 |
+
ttk.Spinbox(gpu_frame, from_=1, to=4, width=10, textvariable=self.gpu_var).pack(side=tk.RIGHT)
|
| 849 |
+
|
| 850 |
+
# CPU Semaphore
|
| 851 |
+
cpu_frame = ttk.Frame(control_frame)
|
| 852 |
+
cpu_frame.pack(fill=tk.X, pady=2)
|
| 853 |
+
ttk.Label(cpu_frame, text="CPU Semaphore (2-16):").pack(side=tk.LEFT)
|
| 854 |
+
self.cpu_var = tk.StringVar(value="4")
|
| 855 |
+
ttk.Spinbox(cpu_frame, from_=2, to=16, width=10, textvariable=self.cpu_var).pack(side=tk.RIGHT)
|
| 856 |
+
|
| 857 |
+
# I/O Semaphore
|
| 858 |
+
io_frame = ttk.Frame(control_frame)
|
| 859 |
+
io_frame.pack(fill=tk.X, pady=2)
|
| 860 |
+
ttk.Label(io_frame, text="I/O Semaphore (3-16):").pack(side=tk.LEFT)
|
| 861 |
+
self.io_var = tk.StringVar(value="6")
|
| 862 |
+
ttk.Spinbox(io_frame, from_=3, to=16, width=10, textvariable=self.io_var).pack(side=tk.RIGHT)
|
| 863 |
+
|
| 864 |
+
# Thread Pool
|
| 865 |
+
thread_frame = ttk.Frame(control_frame)
|
| 866 |
+
thread_frame.pack(fill=tk.X, pady=2)
|
| 867 |
+
ttk.Label(thread_frame, text="Thread Pool (2-20):").pack(side=tk.LEFT)
|
| 868 |
+
self.thread_var = tk.StringVar(value="6")
|
| 869 |
+
ttk.Spinbox(thread_frame, from_=2, to=20, width=10, textvariable=self.thread_var).pack(side=tk.RIGHT)
|
| 870 |
+
|
| 871 |
+
# Apply button
|
| 872 |
+
ttk.Button(control_frame, text="Apply Settings", command=self.apply_settings).pack(pady=10)
|
| 873 |
+
|
| 874 |
+
# Presets
|
| 875 |
+
presets_frame = ttk.LabelFrame(settings_frame, text="Performance Presets")
|
| 876 |
+
presets_frame.pack(fill=tk.X, pady=(10, 0))
|
| 877 |
+
|
| 878 |
+
preset_grid = ttk.Frame(presets_frame)
|
| 879 |
+
preset_grid.pack(pady=5)
|
| 880 |
+
|
| 881 |
+
ttk.Button(preset_grid, text="Light (1,2,4,4)", command=lambda: self.apply_preset(1,2,4,4), width=15).grid(row=0, column=0, padx=2, pady=2)
|
| 882 |
+
ttk.Button(preset_grid, text="Balanced (2,4,6,6)", command=lambda: self.apply_preset(2,4,6,6), width=15).grid(row=0, column=1, padx=2, pady=2)
|
| 883 |
+
ttk.Button(preset_grid, text="Performance (3,8,10,10)", command=lambda: self.apply_preset(3,8,10,10), width=15).grid(row=1, column=0, padx=2, pady=2)
|
| 884 |
+
ttk.Button(preset_grid, text="Ultra (4,12,12,16)", command=lambda: self.apply_preset(4,12,12,16), width=15).grid(row=1, column=1, padx=2, pady=2)
|
| 885 |
+
|
| 886 |
+
# Status monitor
|
| 887 |
+
monitor_frame = ttk.LabelFrame(root, text="Resource Monitor", padding="10")
|
| 888 |
+
monitor_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=5)
|
| 889 |
+
|
| 890 |
+
self.monitor_text = tk.Text(monitor_frame, height=8, state="disabled")
|
| 891 |
+
scrollbar = ttk.Scrollbar(monitor_frame, orient="vertical", command=self.monitor_text.yview)
|
| 892 |
+
self.monitor_text.configure(yscrollcommand=scrollbar.set)
|
| 893 |
+
|
| 894 |
+
self.monitor_text.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
|
| 895 |
+
scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
|
| 896 |
+
|
| 897 |
+
# Start monitoring
|
| 898 |
+
self.update_display()
|
| 899 |
+
self.start_monitoring()
|
| 900 |
+
|
| 901 |
+
def setup_menu(self):
|
| 902 |
+
"""Setup menu bar"""
|
| 903 |
+
menubar = tk.Menu(self.root)
|
| 904 |
+
self.root.config(menu=menubar)
|
| 905 |
+
|
| 906 |
+
# File menu
|
| 907 |
+
file_menu = tk.Menu(menubar, tearoff=0)
|
| 908 |
+
menubar.add_cascade(label="File", menu=file_menu)
|
| 909 |
+
file_menu.add_command(label="Ẩn giao diện", command=lambda: self.root.withdraw())
|
| 910 |
+
file_menu.add_separator()
|
| 911 |
+
file_menu.add_command(label="Tắt server", command=self.shutdown_server)
|
| 912 |
+
|
| 913 |
+
# View menu
|
| 914 |
+
view_menu = tk.Menu(menubar, tearoff=0)
|
| 915 |
+
menubar.add_cascade(label="View", menu=view_menu)
|
| 916 |
+
view_menu.add_command(label="Refresh", command=self.update_display)
|
| 917 |
+
view_menu.add_command(label="Open API Docs", command=self.open_docs)
|
| 918 |
+
|
| 919 |
+
# Help menu
|
| 920 |
+
help_menu = tk.Menu(menubar, tearoff=0)
|
| 921 |
+
menubar.add_cascade(label="Help", menu=help_menu)
|
| 922 |
+
help_menu.add_command(label="About", command=self.show_about)
|
| 923 |
+
|
| 924 |
+
def show_about(self):
|
| 925 |
+
"""Show about dialog"""
|
| 926 |
+
about_text = """VieNeu-TTS Server Control
|
| 927 |
+
|
| 928 |
+
Version: 1.0.0
|
| 929 |
+
Server: FastAPI with Async Processing
|
| 930 |
+
Model: VieNeu-TTS-1000h
|
| 931 |
+
|
| 932 |
+
Features:
|
| 933 |
+
• Real-time settings adjustment
|
| 934 |
+
• Performance monitoring
|
| 935 |
+
• Cache management
|
| 936 |
+
• Multiple presets
|
| 937 |
+
|
| 938 |
+
Server URL: http://127.0.0.1:8000"""
|
| 939 |
+
|
| 940 |
+
messagebox.showinfo("About", about_text)
|
| 941 |
+
|
| 942 |
+
def on_closing(self):
|
| 943 |
+
"""Handle window closing event"""
|
| 944 |
+
result = messagebox.askyesnocancel(
|
| 945 |
+
"Tắt Server",
|
| 946 |
+
"Đóng giao diện sẽ tắt server.\n\n" +
|
| 947 |
+
"• Yes: Tắt server và giao diện\n" +
|
| 948 |
+
"• No: Chỉ ẩn giao diện (server vẫn chạy)\n" +
|
| 949 |
+
"• Cancel: Không làm gì"
|
| 950 |
+
)
|
| 951 |
+
|
| 952 |
+
if result is True: # Yes - Shutdown server
|
| 953 |
+
self.log_message("🔄 Đang tắt server...")
|
| 954 |
+
self.status_label.config(text="Status: 🔄 Shutting down...", foreground="orange")
|
| 955 |
+
self.root.destroy()
|
| 956 |
+
|
| 957 |
+
# Force exit the entire application
|
| 958 |
+
import threading
|
| 959 |
+
def force_exit():
|
| 960 |
+
import time
|
| 961 |
+
time.sleep(1) # Give time for cleanup
|
| 962 |
+
os._exit(0) # Force exit
|
| 963 |
+
|
| 964 |
+
threading.Thread(target=force_exit, daemon=True).start()
|
| 965 |
+
|
| 966 |
+
elif result is False: # No - Just hide GUI
|
| 967 |
+
self.log_message("ℹ️ Giao diện đã ẩn. Server vẫn chạy tại http://127.0.0.1:8000")
|
| 968 |
+
self.root.withdraw() # Hide window instead of destroying
|
| 969 |
+
|
| 970 |
+
# Add system tray notification (if possible)
|
| 971 |
+
try:
|
| 972 |
+
import subprocess
|
| 973 |
+
subprocess.run([
|
| 974 |
+
'powershell', '-Command',
|
| 975 |
+
f'Add-Type -AssemblyName System.Windows.Forms; ' +
|
| 976 |
+
f'[System.Windows.Forms.MessageBox]::Show("Server vẫn chạy tại http://127.0.0.1:8000", "VieNeu-TTS", "OK", "Information")'
|
| 977 |
+
], capture_output=True)
|
| 978 |
+
except:
|
| 979 |
+
pass
|
| 980 |
+
# If Cancel (None), do nothing
|
| 981 |
+
|
| 982 |
+
def open_docs(self):
|
| 983 |
+
webbrowser.open("http://127.0.0.1:8000/docs")
|
| 984 |
+
|
| 985 |
+
def test_server(self):
|
| 986 |
+
import requests
|
| 987 |
+
try:
|
| 988 |
+
test_data = {
|
| 989 |
+
"text": "Test từ giao diện server",
|
| 990 |
+
"voice_choice": "Tuyên (nam miền Bắc)",
|
| 991 |
+
"speed_factor": 1.0
|
| 992 |
+
}
|
| 993 |
+
response = requests.post("http://127.0.0.1:8000/tts", json=test_data, timeout=30)
|
| 994 |
+
if response.status_code == 200:
|
| 995 |
+
result = response.json()
|
| 996 |
+
self.log_message(f"✅ Test thành công! Thời gian: {result.get('processing_time', 0):.2f}s")
|
| 997 |
+
else:
|
| 998 |
+
self.log_message(f"❌ Test thất bại: HTTP {response.status_code}")
|
| 999 |
+
except Exception as e:
|
| 1000 |
+
self.log_message(f"❌ Lỗi test: {str(e)}")
|
| 1001 |
+
|
| 1002 |
+
def clear_cache(self):
|
| 1003 |
+
import requests
|
| 1004 |
+
try:
|
| 1005 |
+
response = requests.post("http://127.0.0.1:8000/admin/clear_cache", timeout=5)
|
| 1006 |
+
if response.status_code == 200:
|
| 1007 |
+
self.log_message("✅ Cache đã được xóa")
|
| 1008 |
+
else:
|
| 1009 |
+
self.log_message("❌ Không thể xóa cache")
|
| 1010 |
+
except Exception as e:
|
| 1011 |
+
self.log_message(f"❌ Lỗi xóa cache: {str(e)}")
|
| 1012 |
+
|
| 1013 |
+
def shutdown_server(self):
|
| 1014 |
+
"""Shutdown server gracefully"""
|
| 1015 |
+
if messagebox.askokcancel("Tắt Server", "Bạn có chắc chắn muốn tắt server?"):
|
| 1016 |
+
self.log_message("🔄 Đang tắt server...")
|
| 1017 |
+
self.status_label.config(text="Status: 🔄 Shutting down...", foreground="orange")
|
| 1018 |
+
|
| 1019 |
+
# Close the GUI and exit
|
| 1020 |
+
self.root.after(1000, lambda: [self.root.destroy(), os._exit(0)])
|
| 1021 |
+
|
| 1022 |
+
def apply_preset(self, gpu, cpu, io, threads):
|
| 1023 |
+
preset_names = {
|
| 1024 |
+
(1,2,4,4): "Light",
|
| 1025 |
+
(2,4,6,6): "Balanced",
|
| 1026 |
+
(3,8,10,10): "Performance",
|
| 1027 |
+
(4,12,12,16): "Ultra"
|
| 1028 |
+
}
|
| 1029 |
+
preset_name = preset_names.get((gpu, cpu, io, threads), "Custom")
|
| 1030 |
+
|
| 1031 |
+
self.log_message(f"🎯 Áp dụng preset {preset_name}...")
|
| 1032 |
+
self.gpu_var.set(str(gpu))
|
| 1033 |
+
self.cpu_var.set(str(cpu))
|
| 1034 |
+
self.io_var.set(str(io))
|
| 1035 |
+
self.thread_var.set(str(threads))
|
| 1036 |
+
self.apply_settings()
|
| 1037 |
+
|
| 1038 |
+
def apply_settings(self):
|
| 1039 |
+
import requests
|
| 1040 |
+
try:
|
| 1041 |
+
settings = {
|
| 1042 |
+
"gpu_semaphore": int(self.gpu_var.get()),
|
| 1043 |
+
"cpu_semaphore": int(self.cpu_var.get()),
|
| 1044 |
+
"io_semaphore": int(self.io_var.get()),
|
| 1045 |
+
"thread_pool": int(self.thread_var.get())
|
| 1046 |
+
}
|
| 1047 |
+
|
| 1048 |
+
self.log_message(f"🔄 Đang áp dụng cài đặt...")
|
| 1049 |
+
response = requests.post("http://127.0.0.1:8000/admin/update_settings", json=settings, timeout=5)
|
| 1050 |
+
if response.status_code == 200:
|
| 1051 |
+
self.log_message(f"✅ Cài đặt đã áp dụng: GPU({settings['gpu_semaphore']}) CPU({settings['cpu_semaphore']}) I/O({settings['io_semaphore']}) Threads({settings['thread_pool']})")
|
| 1052 |
+
# Update display after a short delay to see the changes
|
| 1053 |
+
self.root.after(1000, self.update_display)
|
| 1054 |
+
else:
|
| 1055 |
+
self.log_message(f"❌ Không thể áp dụng cài đặt: {response.text}")
|
| 1056 |
+
except Exception as e:
|
| 1057 |
+
self.log_message(f"❌ Lỗi áp dụng cài đặt: {str(e)}")
|
| 1058 |
+
|
| 1059 |
+
def update_display(self):
|
| 1060 |
+
import requests
|
| 1061 |
+
try:
|
| 1062 |
+
response = requests.get("http://127.0.0.1:8000/status", timeout=3)
|
| 1063 |
+
if response.status_code == 200:
|
| 1064 |
+
data = response.json()
|
| 1065 |
+
|
| 1066 |
+
# Update settings display
|
| 1067 |
+
settings_info = "=== CÀI ĐẶT HIỆN TẠI ===\n"
|
| 1068 |
+
if 'async_processing' in data:
|
| 1069 |
+
async_data = data['async_processing']
|
| 1070 |
+
for resource, info in async_data.items():
|
| 1071 |
+
available = info.get('available', 0)
|
| 1072 |
+
max_cap = info.get('max_capacity', 0)
|
| 1073 |
+
in_use = max(0, max_cap - available) # Ensure non-negative
|
| 1074 |
+
settings_info += f"{resource}: {max_cap} max, {in_use} đang dùng\n"
|
| 1075 |
+
|
| 1076 |
+
self.settings_text.config(state="normal")
|
| 1077 |
+
self.settings_text.delete(1.0, tk.END)
|
| 1078 |
+
self.settings_text.insert(1.0, settings_info)
|
| 1079 |
+
self.settings_text.config(state="disabled")
|
| 1080 |
+
|
| 1081 |
+
# Update monitor
|
| 1082 |
+
monitor_info = f"=== TRẠNG THÁI SERVER ===\n"
|
| 1083 |
+
monitor_info += f"Cập nhật: {datetime.now().strftime('%H:%M:%S')}\n\n"
|
| 1084 |
+
|
| 1085 |
+
if 'async_processing' in data:
|
| 1086 |
+
monitor_info += "📊 Tài nguyên Async:\n"
|
| 1087 |
+
async_data = data['async_processing']
|
| 1088 |
+
for resource, info in async_data.items():
|
| 1089 |
+
available = info.get('available', 0)
|
| 1090 |
+
max_cap = info.get('max_capacity', 1)
|
| 1091 |
+
in_use = max(0, max_cap - available)
|
| 1092 |
+
usage_pct = (in_use / max_cap) * 100 if max_cap > 0 else 0
|
| 1093 |
+
|
| 1094 |
+
# Visual progress bar
|
| 1095 |
+
bar_length = 10
|
| 1096 |
+
filled = int((usage_pct / 100) * bar_length)
|
| 1097 |
+
bar = "█" * filled + "░" * (bar_length - filled)
|
| 1098 |
+
|
| 1099 |
+
monitor_info += f" {resource}: {bar} {usage_pct:.0f}% ({in_use}/{max_cap})\n"
|
| 1100 |
+
|
| 1101 |
+
if 'model_info' in data:
|
| 1102 |
+
model_info = data['model_info']
|
| 1103 |
+
monitor_info += f"\n🖥️ Model Info:\n"
|
| 1104 |
+
monitor_info += f" Device: {model_info.get('device', 'unknown')}\n"
|
| 1105 |
+
monitor_info += f" Cache: {model_info.get('reference_cache_size', 0)} giọng nói\n"
|
| 1106 |
+
|
| 1107 |
+
cached_voices = model_info.get('cached_voices', [])
|
| 1108 |
+
if cached_voices:
|
| 1109 |
+
monitor_info += f" Cached: {', '.join(cached_voices[:3])}"
|
| 1110 |
+
if len(cached_voices) > 3:
|
| 1111 |
+
monitor_info += f" (+{len(cached_voices)-3} khác)"
|
| 1112 |
+
monitor_info += "\n"
|
| 1113 |
+
|
| 1114 |
+
self.monitor_text.config(state="normal")
|
| 1115 |
+
self.monitor_text.delete(1.0, tk.END)
|
| 1116 |
+
self.monitor_text.insert(1.0, monitor_info)
|
| 1117 |
+
self.monitor_text.config(state="disabled")
|
| 1118 |
+
|
| 1119 |
+
except Exception:
|
| 1120 |
+
pass # Ignore errors during startup
|
| 1121 |
+
|
| 1122 |
+
def log_message(self, message):
|
| 1123 |
+
timestamp = datetime.now().strftime("%H:%M:%S")
|
| 1124 |
+
log_msg = f"[{timestamp}] {message}\n"
|
| 1125 |
+
|
| 1126 |
+
self.monitor_text.config(state="normal")
|
| 1127 |
+
self.monitor_text.insert(tk.END, log_msg)
|
| 1128 |
+
self.monitor_text.see(tk.END)
|
| 1129 |
+
self.monitor_text.config(state="disabled")
|
| 1130 |
+
|
| 1131 |
+
def start_monitoring(self):
|
| 1132 |
+
def monitor_loop():
|
| 1133 |
+
while True:
|
| 1134 |
+
try:
|
| 1135 |
+
self.update_display()
|
| 1136 |
+
except:
|
| 1137 |
+
pass
|
| 1138 |
+
time.sleep(5)
|
| 1139 |
+
|
| 1140 |
+
import threading
|
| 1141 |
+
threading.Thread(target=monitor_loop, daemon=True).start()
|
| 1142 |
+
|
| 1143 |
+
try:
|
| 1144 |
+
root = tk.Tk()
|
| 1145 |
+
gui = ServerGUI(root)
|
| 1146 |
+
root.mainloop()
|
| 1147 |
+
except Exception as e:
|
| 1148 |
+
print(f"GUI Error: {e}")
|
| 1149 |
+
|
| 1150 |
+
if __name__ == "__main__":
|
| 1151 |
+
import uvicorn
|
| 1152 |
+
import threading
|
| 1153 |
+
|
| 1154 |
+
# Start GUI in separate thread
|
| 1155 |
+
gui_thread = threading.Thread(target=start_gui, daemon=True)
|
| 1156 |
+
gui_thread.start()
|
| 1157 |
+
|
| 1158 |
+
# Start server
|
| 1159 |
+
uvicorn.run(
|
| 1160 |
+
"tts_server:app",
|
| 1161 |
+
host="127.0.0.1",
|
| 1162 |
+
port=8000,
|
| 1163 |
+
reload=False,
|
| 1164 |
+
log_level="info"
|
| 1165 |
+
)
|
uv.lock
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|