victorgg commited on
Commit
6f8d5c0
·
verified ·
1 Parent(s): ed7fa16

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .editorconfig +9 -0
  2. .eslintignore +4 -0
  3. .eslintrc.cjs +20 -0
  4. .gitattributes +35 -0
  5. .gitignore +6 -0
  6. .npmrc +5 -0
  7. .prettierignore +6 -0
  8. .prettierrc.yaml +4 -0
  9. .vscode/settings.json +11 -0
  10. Heygem.ai model community Licensing Agreement.pdf +3 -0
  11. Heygem.ai模型社区许可协议.pdf +3 -0
  12. LICENSE +32 -0
  13. README.md +356 -10
  14. README_zh.assets/2025-03-20_14-38-00.jpg +0 -0
  15. README_zh.assets/576746d5-5215-4973-b1ca-c8d7409a6403.png +0 -0
  16. README_zh.assets/61eb4c19-3e7a-4791-a266-de4209690cbd.png +0 -0
  17. README_zh.assets/9a10b7b2-1eea-48c1-b7af-34129fe04446.png +0 -0
  18. README_zh.assets/accept.png +3 -0
  19. README_zh.assets/cb10263a14cc826e22c2be4bcae01a89.jpg +3 -0
  20. README_zh.assets/coze-video.png +0 -0
  21. README_zh.assets/e29d1922-7c58-46b4-b1e9-961f853f26d4.png +0 -0
  22. README_zh.assets/image-20250304114114272.png +3 -0
  23. README_zh.assets/nvidia.png +3 -0
  24. README_zh.assets/output.png +3 -0
  25. README_zh.assets/shortcut.png +0 -0
  26. README_zh.assets/updatewsl.png +0 -0
  27. README_zh.assets/wsl-list.png +0 -0
  28. README_zh.md +418 -0
  29. build/entitlements.mac.plist +12 -0
  30. build/icon.icns +0 -0
  31. build/icon.ico +3 -0
  32. build/icon.png +0 -0
  33. build/logo-outline.ico +3 -0
  34. deploy/docker-compose-linux.yml +60 -0
  35. deploy/docker-compose-lite.yml +26 -0
  36. deploy/docker-compose.yml +60 -0
  37. dev-app-update.yml +3 -0
  38. doc/常见问题.assets/2025-03-25_09-36-07.jpg +3 -0
  39. doc//345/270/270/350/247/201/351/227/256/351/242/230.assets/3074702f4d2eefb2faf1df3e1cb9cf2.png +0 -0
  40. doc//345/270/270/350/247/201/351/227/256/351/242/230.assets/e29d1922-7c58-46b4-b1e9-961f853f26d4.png +0 -0
  41. doc/常见问题.assets/image-20250308205954494.png +3 -0
  42. doc//345/270/270/350/247/201/351/227/256/351/242/230.assets/image-20250308212642892.png +0 -0
  43. doc/常见问题.assets/image-20250308213957568.png +3 -0
  44. doc/常见问题.assets/image-20250308215812201.png +3 -0
  45. doc//345/270/270/350/247/201/351/227/256/351/242/230.assets/image-20250311143803466.png +0 -0
  46. doc/常见问题.md +128 -0
  47. electron-builder.yml +64 -0
  48. electron.vite.config.mjs +19 -0
  49. jsconfig.json +8 -0
  50. package-lock.json +0 -0
.editorconfig ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ root = true
2
+
3
+ [*]
4
+ charset = utf-8
5
+ indent_style = space
6
+ indent_size = 2
7
+ end_of_line = lf
8
+ insert_final_newline = true
9
+ trim_trailing_whitespace = true
.eslintignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ node_modules
2
+ dist
3
+ out
4
+ .gitignore
.eslintrc.cjs ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* eslint-env node */
2
+ require('@rushstack/eslint-patch/modern-module-resolution')
3
+
4
+ module.exports = {
5
+ extends: [
6
+ 'eslint:recommended',
7
+ // 'plugin:vue/vue3-recommended',
8
+ '@electron-toolkit',
9
+ // '@vue/eslint-config-prettier'
10
+ ],
11
+ parserOptions: {
12
+ ecmaVersion: 2022, // 或者更高版本
13
+ sourceType: 'module', // 如果你使用 ES 模块
14
+ },
15
+ rules: {
16
+ 'no-unused-vars': 'off',
17
+ 'vue/require-default-prop': 'off',
18
+ 'vue/multi-word-component-names': 'off'
19
+ }
20
+ }
.gitattributes CHANGED
@@ -33,3 +33,38 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ Heygem.ai[[:space:]]model[[:space:]]community[[:space:]]Licensing[[:space:]]Agreement.pdf filter=lfs diff=lfs merge=lfs -text
37
+ Heygem.ai模型社区许可协议.pdf filter=lfs diff=lfs merge=lfs -text
38
+ README_zh.assets/accept.png filter=lfs diff=lfs merge=lfs -text
39
+ README_zh.assets/cb10263a14cc826e22c2be4bcae01a89.jpg filter=lfs diff=lfs merge=lfs -text
40
+ README_zh.assets/image-20250304114114272.png filter=lfs diff=lfs merge=lfs -text
41
+ README_zh.assets/nvidia.png filter=lfs diff=lfs merge=lfs -text
42
+ README_zh.assets/output.png filter=lfs diff=lfs merge=lfs -text
43
+ build/icon.ico filter=lfs diff=lfs merge=lfs -text
44
+ build/logo-outline.ico filter=lfs diff=lfs merge=lfs -text
45
+ doc/常见问题.assets/2025-03-25_09-36-07.jpg filter=lfs diff=lfs merge=lfs -text
46
+ doc/常见问题.assets/image-20250308205954494.png filter=lfs diff=lfs merge=lfs -text
47
+ doc/常见问题.assets/image-20250308213957568.png filter=lfs diff=lfs merge=lfs -text
48
+ doc/常见问题.assets/image-20250308215812201.png filter=lfs diff=lfs merge=lfs -text
49
+ resources/ffmpeg/linux-amd64/ffmpeg filter=lfs diff=lfs merge=lfs -text
50
+ resources/ffmpeg/linux-amd64/ffprobe filter=lfs diff=lfs merge=lfs -text
51
+ resources/ffmpeg/linux-amd64/qt-faststart filter=lfs diff=lfs merge=lfs -text
52
+ resources/ffmpeg/win-amd64/bin/avcodec-58.dll filter=lfs diff=lfs merge=lfs -text
53
+ resources/ffmpeg/win-amd64/bin/avdevice-58.dll filter=lfs diff=lfs merge=lfs -text
54
+ resources/ffmpeg/win-amd64/bin/avfilter-7.dll filter=lfs diff=lfs merge=lfs -text
55
+ resources/ffmpeg/win-amd64/bin/avformat-58.dll filter=lfs diff=lfs merge=lfs -text
56
+ resources/ffmpeg/win-amd64/bin/avutil-56.dll filter=lfs diff=lfs merge=lfs -text
57
+ resources/ffmpeg/win-amd64/bin/ffmpeg.exe filter=lfs diff=lfs merge=lfs -text
58
+ resources/ffmpeg/win-amd64/bin/ffplay.exe filter=lfs diff=lfs merge=lfs -text
59
+ resources/ffmpeg/win-amd64/bin/ffprobe.exe filter=lfs diff=lfs merge=lfs -text
60
+ resources/ffmpeg/win-amd64/bin/postproc-55.dll filter=lfs diff=lfs merge=lfs -text
61
+ resources/ffmpeg/win-amd64/bin/swresample-3.dll filter=lfs diff=lfs merge=lfs -text
62
+ resources/ffmpeg/win-amd64/bin/swscale-5.dll filter=lfs diff=lfs merge=lfs -text
63
+ resources/ffmpeg/win-amd64/lib/avcodec.lib filter=lfs diff=lfs merge=lfs -text
64
+ resources/ffmpeg/win-amd64/lib/avformat.lib filter=lfs diff=lfs merge=lfs -text
65
+ resources/ffmpeg/win-amd64/lib/avutil.lib filter=lfs diff=lfs merge=lfs -text
66
+ resources/ffmpeg/win-amd64/lib/libavcodec.dll.a filter=lfs diff=lfs merge=lfs -text
67
+ resources/ffmpeg/win-amd64/lib/libavformat.dll.a filter=lfs diff=lfs merge=lfs -text
68
+ resources/ffmpeg/win-amd64/lib/libavutil.dll.a filter=lfs diff=lfs merge=lfs -text
69
+ resources/logo-outline.ico filter=lfs diff=lfs merge=lfs -text
70
+ resources/logo-solid.ico filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ node_modules
2
+ dist
3
+ out
4
+ .idea
5
+ .DS_Store
6
+ *.log*
.npmrc ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ electron_mirror=https://npmmirror.com/mirrors/electron/
2
+ electron_builder_binaries_mirror=https://npmmirror.com/mirrors/electron-builder-binaries/
3
+
4
+ better_sqlite3_binary_host=https://registry.npmmirror.com/-/binary/better-sqlite3
5
+ better_sqlite3_binary_host_mirror=https://registry.npmmirror.com/-/binary/better-sqlite3
.prettierignore ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ out
2
+ dist
3
+ pnpm-lock.yaml
4
+ LICENSE.md
5
+ tsconfig.json
6
+ tsconfig.*.json
.prettierrc.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ singleQuote: true
2
+ semi: false
3
+ printWidth: 100
4
+ trailingComma: none
.vscode/settings.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "i18n-ally.localesPaths": "src/renderer/src/i18n/config",
3
+ "i18n-ally.enabledParsers": ["js"],
4
+ "i18n-ally.enabledFrameworks": ["vue", "vue-sfc"],
5
+ "i18n-ally.keystyle": "nested",
6
+ "i18n-ally.displayLanguage": "zh",
7
+ "i18n-ally.sortKeys": false,
8
+ "i18n-ally.namespace": true,
9
+ "i18n-ally.translate.engines": ["deepl", "google"], // 翻译器
10
+ "i18n-ally.extract.keygenStyle": "camelCase" // 翻译字段命名样式采用驼峰
11
+ }
Heygem.ai model community Licensing Agreement.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca7615e75b26024f5c66d5e2f408b0582fefe4467797407c8cd2f36602f9481f
3
+ size 184307
Heygem.ai模型社区许可协议.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ef437924a485a1c6ff8f112fdfec89c7b5fcfbc966b944fb85e3a4eea27cd2a
3
+ size 261879
LICENSE ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Silicon Intelligence COMMUNITY LICENSE AGREEMENT
2
+
3
+ “Agreement” means the terms and conditions for use, reproduction, distribution and modification of this product forth herein.
4
+
5
+ “Documentation” means the specifications, manuals and documentation by Silicon Intelligence.
6
+
7
+ “Licensee” or “you” means you, or your employer or any other person or entity (if you are entering into this Agreement on such person or entity’s behalf), of the age required under applicable laws, rules or regulations to provide legal consent and that has legal authority to bind your employer or such other person or entity if you are entering in this Agreement on their behalf.
8
+
9
+ “Silicon Intelligence Materials” means, collectively, Silicon Intelligence’s proprietary code and Documentation (and any portion thereof) made available under this Agreement.
10
+
11
+ By clicking “I Accept” below or by using or distributing any portion or element of the Silicon Intelligence Materials, you agree to be bound by this Agreement.
12
+
13
+ 1. License Rights and Redistribution.
14
+
15
+ a. Grant of Rights. You are granted a non-exclusive, worldwide, non-transferable and royalty-free limited license under ’s intellectual property or other rights owned by Silicon Intelligence embodied in the SILICON INTELLIGENCE Materials to use, reproduce, distribute, copy, create derivative works of, and make modifications to the Silicon Intelligence Materials.
16
+ b. Redistribution and Use.
17
+ i. If you distribute or make available the Silicon Intelligence Materials (or any derivative works thereof), or a product or service that uses any of them, you shall (A) provide a copy of this Agreement with any such Silicon Intelligence Materials; and (B) prominently display “Built with Silicon Intelligence” on a related website, user interface, blogpost, about page, or product documentation. If you use the Silicon Intelligence Materials to create, train, fine tune, or otherwise improve an AI model, which is distributed or made available, you shall also include “Silicon Intelligence” at the beginning of any such AI model name.
18
+ ii. If you receive Silicon Intelligence Materials, or any derivative works thereof, from a Licensee as part of an integrated end user product, then Section 2 of this Agreement will not apply to you.
19
+ iii. You must retain in all copies of the Silicon Intelligence Materials that you distribute the following attribution notice within a “Notice” text file distributed as a part of such copies: “Silicon Intelligence is licensed under the Silicon Intelligence Community License, Copyright © Silicon Intelligence Platforms, Inc. All Rights Reserved.”
20
+ iv. Your use of the Silicon Intelligence Materials must comply with applicable laws and regulations (including trade compliance laws and regulations) .
21
+
22
+ 2. Additional Commercial Terms. If, on the Silicon Intelligence duix.ai version release date, the monthly active users of the products or services made available by or for Licensee, or Licensee’s affiliates, is greater than 1 thousand monthly active users in the preceding calendar month, or your product based Silicon Intelligence material your active users greater 1 thousand, you must request a license from Silicon Intelligence, which Silicon Intelligence may grant to you in its sole discretion, and you are not authorized to exercise any of the rights under this Agreement unless or until Silicon Intelligence otherwise expressly grants you such rights.
23
+
24
+ 3. Disclaimer of Warranty. UNLESS REQUIRED BY APPLICABLE LAW, THE SILICON INTELLIGENCE MATERIALS AND ANY OUTPUT AND RESULTS THEREFROM ARE PROVIDED ON AN “AS IS” BASIS, WITHOUT WARRANTIES OF ANY KIND, AND SILICON INTELLIGENCE DISCLAIMS ALL WARRANTIES OF ANY KIND, BOTH EXPRESS AND IMPLIED, INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. YOU ARE SOLELY RESPONSIBLE FOR DETERMINING THE APPROPRIATENESS OF USING OR REDISTRIBUTING THE SILICON INTELLIGENCE MATERIALS AND ASSUME ANY RISKS ASSOCIATED WITH YOUR USE OF THE SILICON INTELLIGENCE MATERIALS AND ANY OUTPUT AND RESULTS.
25
+
26
+ 4. Limitation of Liability. IN NO EVENT WILL SILICON INTELLIGENCE OR ITS AFFILIATES BE LIABLE UNDER ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, TORT, NEGLIGENCE, PRODUCTS LIABILITY, OR OTHERWISE, ARISING OUT OF THIS AGREEMENT, FOR ANY LOST PROFITS OR ANY INDIRECT, SPECIAL, CONSEQUENTIAL, INCIDENTAL, EXEMPLARY OR PUNITIVE DAMAGES, EVEN IF SILICON INTELLIGENCE OR ITS AFFILIATES HAVE BEEN ADVISED OF THE POSSIBILITY OF ANY OF THE FOREGOING.
27
+
28
+ 5. Intellectual Property.
29
+ a. No trademark licenses are granted under this Agreement, and in connection with the Silicon Intelligence Materials, neither Silicon Intelligence nor Licensee may use any name or mark owned by or associated with the other or any of its affiliates, except as required for reasonable and customary use in describing and redistributing the Silicon Intelligence Materials or as set forth in this Section 5(a). Silicon Intelligence hereby grants you a license to use “Silicon Intelligence” solely as required to comply with the last sentence of Section 1.b.i. You will comply with Silicon Intelligence’s brand guidelines . All goodwill arising out of your use of the Mark will inure to the benefit of Silicon Intelligence.
30
+ b. If you institute litigation or other proceedings against Silicon Intelligenceor any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Silicon Intelligence Materials or outputs or results, or any portion of any of the foregoing, constitutes infringement of intellectual property or other rights owned or licensable by you, then any licenses granted to you under this Agreement shall terminate as of the date such litigation or claim is filed or instituted. You will indemnify and hold harmless Silicon Intelligence from and against any claim by any third party arising out of or related to your use or distribution of the Silicon Intelligence Materials.
31
+
32
+ 6. Term and Termination. The term of this Agreement will commence upon your acceptance of this Agreement or access to the Silicon Intelligence Materials and will continue in full force and effect until terminated in accordance with the terms and conditions herein. Silicon Intelligence may terminate this Agreement if you are in breach of any term or condition of this Agreement. Upon termination of this Agreement, you shall delete and cease use of the Silicon Intelligence Materials. Sections 3, 4 shall survive the termination of this Agreement.
README.md CHANGED
@@ -1,10 +1,356 @@
1
- ---
2
- title: Gemai
3
- emoji: 🚀
4
- colorFrom: red
5
- colorTo: red
6
- sdk: docker
7
- pinned: false
8
- ---
9
-
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Heygem - Open Source Alternative to Heygen [【切换中文】](./README_zh.md)
2
+
3
+ ## Announcement
4
+ Heygem digital human cloning intelligent agent and plugins have been successfully launched on the Coze platform. No complex deployment is required, even novice users can easily get started and use it directly.
5
+
6
+ Click here to instantly access the Coze store experience👉[Silicon-based Intelligent Digital Human Cloning Agent](https://www.coze.cn/store/agent/7488696243959431206?bid=6ftfk9dtg0g12) | [Silicon-based Intelligent Digital Human Cloning Plugin](https://www.coze.cn/store/plugin/7488926246634782746)
7
+
8
+ Scan the code to watch the operation video
9
+
10
+ <img src="./README_zh.assets/coze-video.png" width="50%">
11
+
12
+ ## [New Ubuntu Version Notice]
13
+
14
+ **Ubuntu Version Officially Released**
15
+
16
+ 1. Adaptation and verification work for Ubuntu 22.04 Desktop version (kernel 6.8.0-52-generic) has been completed. Compatibility testing for other Linux versions has not yet been conducted.
17
+ 2. Added internationalization (English) for the client program interface.
18
+ 3. Fixed some known issues
19
+ - #304
20
+ - #292
21
+ 4. [Ubuntu22.04 Installation Documentation](https://github.com/GuijiAI/HeyGem.ai?tab=readme-ov-file#ubuntu-2204-installation)
22
+
23
+ ## Important Notice to Developer Partners
24
+
25
+ **Dear Heygem Open Source Community Members:**
26
+
27
+ We sincerely thank you for your enthusiastic attention and active participation in the Heygem digital human open source project! We have noticed that some developers face challenges during local deployment. To better meet the needs of different scenarios, we are now announcing two parallel service solutions:
28
+
29
+ | **Project** | **HeyGem Open Source Local Deployment** | **Digital Human/Clone Voice API Service** |
30
+ | ------------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------ |
31
+ | Usage | Open Source Local Deployment | Rapid Clone API Service |
32
+ | Recommended | Technical Users | Business Users |
33
+ | Technical Threshold | Developers with deep learning framework experience/pursuing deep customization/wishing to participate in community co-construction | Quick business integration/focus on upper-level application development/need enterprise-level SLA assurance for commercial scenarios |
34
+ | Hardware Requirements | Need to purchase GPU server | No need to purchase GPU server |
35
+ | Customization | Can modify and extend the code according to your needs, fully controlling the software's functions and behavior | Cannot directly modify the source code, can only extend functions through API-provided interfaces, less flexible than open source projects |
36
+ | Technical Support | Community Support | Dynamic expansion support + professional technical response team |
37
+ | Maintenance Cost | High maintenance cost | Simple maintenance |
38
+ | Lip Sync Effect | Usable effect | Stunning and higher definition effect |
39
+ | Commercial Authorization | Supports global free commercial use (enterprises with more than 100,000 users or annual revenue exceeding 10 million USD need to sign a commercial license agreement) | Commercial use allowed |
40
+ | Iteration Speed | Slow updates, bug fixes depend on the community | Latest models/algorithms are prioritized, fast problem resolution |
41
+
42
+ We always adhere to the open source spirit, and the launch of the API service aims to provide a more complete solution matrix for developers with different needs. No matter which method you choose, you can always obtain technical support documents through James@toolwiz.com. We look forward to working with you to promote the inclusive development of digital human technology!
43
+
44
+ **Silicon-based Intelligent Developer Team**
45
+
46
+ <a href="https://mp.weixin.qq.com/s/vKiBR85E7JyRkr6CxLCppA?mpshare=1&scene=1&srcid=0319sszkopZO6870sGsU0TFc&sharer_shareinfo=cac5ec3bfa62ed558552c7c022821613&sharer_shareinfo_first=cac5ec3bfa62ed558552c7c022821613&from=industrynews#rd" target="_blank">From scratch, hand-in-hand to teach you how to create your own HeyGem open source AI digital human!</a>
47
+
48
+ [**Rapid Clone API**](https://app.guiji.cn/platform) | [**API Documentation Center**](https://guiji.cn/digital-docs/introduce/)
49
+
50
+ [**Real-time Interaction SDK**](https://app.guiji.cn/platform) | [**SDK Documentation Center**](https://guiji.cn/duix-light-document/introduce/)
51
+
52
+ [**Local Real-time Interaction (realtime) duix.ai Open Source Address**](https://github.com/GuijiAI/duix.ai) |
53
+ [**Android Version**](https://github.com/GuijiAI/duix.ai/blob/main/duix-android/dh_aigc_android/README.md) |
54
+ [**IOS Version**](https://github.com/GuijiAI/duix.ai/blob/main/duix-ios/GJLocalDigitalDemo/GJLocalDigitalSDK.md)
55
+
56
+ <img src="./README_zh.assets/cb10263a14cc826e22c2be4bcae01a89.jpg" width="50%">
57
+
58
+ ## Open Source Co-Creation · Shared Glory
59
+
60
+ Since we open-sourced Heygem, global geeks have illuminated the digital avatar matrix in the code universe, with each commit reconstructing the future! But joy is better shared than enjoyed alone—now we invite all experts to join the "Open Source Co-Creation Plan," empowering everyone with AI creativity and propelling the Chinese AI fleet towards the stars!
61
+
62
+ 1. Co-Creation Content Direction
63
+
64
+ Share high-quality videos or articles on Heygem deployment tutorials, optimization guides, and practical cases (Bilibili, Douyin, Xiaohongshu, WeChat Official Accounts, Zhihu, etc.)
65
+
66
+ 2. Open Source Co-Creation Special Reward Pool (Real Cash Rewards!)
67
+
68
+ (1) Basic Rewards
69
+
70
+ Content receiving 20-100 likes will be awarded the [Heygem.ai Master Award] and a 20 RMB cash red envelope.
71
+
72
+ Content receiving 100+ likes will be awarded the [Heygem.ai God Award] and a 50 RMB cash red envelope.
73
+
74
+ (2) Special Achievements:
75
+
76
+ The monthly MVP will unlock the Open Source Hall of Fame digital badge (permanently on-chain).
77
+
78
+ 3. Participation Method
79
+
80
+ Send your creativity to the customer service lady, add a friend with the note "Name+999".
81
+
82
+ <img src="./README_zh.assets/2025-03-20_14-38-00.jpg" width="50%">
83
+
84
+
85
+ ## Outstanding Co-Creation Works Exhibition
86
+
87
+ [HeyGem Digital Human One-Click Start, 8G Video Memory Available, Model Size 10G, No Need for 100G Hard Disk Space, No Need for D Drive, Based on Docker Single Image, Silicon-Based Open Source](https://www.bilibili.com/video/BV1awQqYZEqB/?spm_id_from=333.337.search-card.all.click&vd_source=618f44772c5dafb47317bb728505d79c)
88
+
89
+ [Ai Digital Human 16 - Local Deployment! The Most Popular Open Source Digital Human HeyGem Zero-Basis Hands-On Teaching Setup Tutorial, 20% Generation Stuck Solution, Full Simplified Process with Supporting Files - T8 ComfyUI Tutorial](https://www.bilibili.com/video/BV1ACQSYEErF/?spm_id_from=333.337.search-card.all.click&vd_source=618f44772c5dafb47317bb728505d79c)
90
+
91
+ [Heygem Open Source Witnessed History! Cyber Worker Revolution!](https://www.bilibili.com/video/BV1R3QpYsEY6/?spm_id_from=333.337.search-card.all.click&vd_source=618f44772c5dafb47317bb728505d79c)
92
+
93
+ [Digital Human Project Heygem Local Deployment Tutorial](https://www.bilibili.com/video/BV1eWQ6YgEcp/?spm_id_from=333.337.search-card.all.click&vd_source=618f44772c5dafb47317bb728505d79c)
94
+
95
+ [So Tempting! From Paid to Open Source, AI Digital Humans Will Open a New Era](http://xhslink.com/a/rQPYqoDSRih8)
96
+
97
+ [Open Source Free Digital Humans Are Here, Unlimited Times, Fast Cloning](http://xhslink.com/a/tX3p5V5tajh8)
98
+
99
+ [AI Digital Humans Are Free! GitHub's Hot Project Can Run on Your Computer](http://xhslink.com/a/8UT1kQ7vxjh8)
100
+
101
+ [The Most Popular Free AI Digital Human, HeyGem V1.0.3, Latest Update, One-Click Integration Package! Super Strong Lip-Sync Effect, Speed Up, Supports Long Videos, Batch Generation, 8G Video Memory Available!](https://www.bilibili.com/video/BV1SkoCYpEwh/?share_source=copy_web&vd_source=c38dcdb72a68f2a4e0b3c0f4f9a5a03c)
102
+
103
+ [**HeyGem One-Click Package Windows Direct Run Without Docker Silicon-Based Open Source Digital Human**](https://www.bilibili.com/video/BV1ZgovYGE3u/)
104
+
105
+ ## Introduction
106
+
107
+ <img src="README_zh.assets/image-20250304114114272.png">
108
+
109
+ Heygem is a fully offline video synthesis tool designed for Windows systems that can precisely clone your appearance and voice, digitalizing your image. You can create videos by driving virtual avatars through text and voice. No internet connection is required, protecting your privacy while enjoying convenient and efficient digital experiences.
110
+
111
+ - Core Features
112
+ - Precise Appearance and Voice Cloning: Using advanced AI algorithms to capture human facial features with high precision, including facial features, contours, etc., to build realistic virtual models. It can also precisely clone voices, capturing and reproducing subtle characteristics of human voices, supporting various voice parameter settings to create highly similar cloning effects.
113
+ - Text and Voice-Driven Virtual Avatars: Understanding text content through natural language processing technology, converting text into natural and fluent speech to drive virtual avatars. Voice input can also be used directly, allowing virtual avatars to perform corresponding actions and facial expressions based on the rhythm and intonation of the voice, making the virtual avatar's performance more natural and vivid.
114
+ - Efficient Video Synthesis: Highly synchronizing digital human video images with sound, achieving natural and smooth lip-syncing, intelligently optimizing audio-video synchronization effects.
115
+ - Multi-language Support: Scripts support eight languages - English, Japanese, Korean, Chinese, French, German, Arabic, and Spanish.
116
+ - Key Advantages
117
+ - Fully Offline Operation: No internet connection required, effectively protecting user privacy, allowing users to create in a secure, independent environment, avoiding potential data leaks during network transmission.
118
+ - User-Friendly: Clean and intuitive interface, easy to use even for beginners with no technical background, quickly mastering the software's usage to start their digital human creation journey.
119
+ - Multiple Model Support: Supports importing multiple models and managing them through one-click startup packages, making it convenient for users to choose suitable models based on different creative needs and application scenarios.
120
+ - Technical Support
121
+ - Voice Cloning Technology: Using advanced technologies like artificial intelligence to generate similar or identical voices based on given voice samples, covering context, intonation, speed, and other aspects of speech.
122
+ - Automatic Speech Recognition: Technology that converts human speech vocabulary content into computer-readable input (text format), enabling computers to "understand" human speech.
123
+ - Computer Vision Technology: Used in video synthesis for visual processing, including facial recognition and lip movement analysis, ensuring virtual avatar lip movements match voice and text content.
124
+
125
+ ## Dependencies
126
+
127
+ 1. Nodejs 18
128
+ 2. Docker Images
129
+ - docker pull guiji2025/fun-asr
130
+ - docker pull guiji2025/fish-speech-ziming
131
+ - docker pull guiji2025/heygem.ai
132
+
133
+ ## Windows Installation
134
+
135
+ ### Prerequisites
136
+
137
+ 1. Must have D Drive: Mainly used for storing digital human and project data
138
+ - Free space requirement: More than 30GB
139
+ 2. C Drive: Used for storing service image files
140
+
141
+ - Free space requirement: More than 100GB
142
+ - If less than 100GB is available, after installing Docker, you can choose a different disk folder with more than 100GB of remaining space at the location shown below.
143
+
144
+ ![output](README_zh.assets/output.png)
145
+
146
+ 3. System Requirements:
147
+ - Currently supports Windows 10 19042.1526 or higher
148
+ 4. Recommended Configuration:
149
+ - CPU: 13th Gen Intel Core i5-13400F
150
+ - Memory: 32GB
151
+ - Graphics Card: RTX 4070
152
+ 5. Ensure you have an NVIDIA graphics card with properly installed drivers
153
+
154
+ NVIDIA driver download link: https://www.nvidia.cn/drivers/lookup/
155
+
156
+ ![nvidia](README_zh.assets/nvidia.png)
157
+
158
+ ### Installing Windows Docker
159
+
160
+ 1. Use the command `wsl --list --verbose` to check if WSL is installed. If it shows as below, it's already installed and no further installation is needed.
161
+
162
+ ![wsl-list](README_zh.assets/wsl-list.png)
163
+
164
+ > - WSL installation command: `wsl --install`
165
+ > - May fail due to network issues, try multiple times
166
+ > - During installation, you'll need to set and remember a new username and password
167
+
168
+ 2. Update WSL using `wsl --update`.
169
+
170
+ ![updatewsl](README_zh.assets/updatewsl.png)
171
+
172
+ 3. [Download Docker for Windows](https://www.docker.com/), choose the appropriate installation package based on your CPU architecture.
173
+
174
+ 4. When you see this interface, installation is successful.
175
+
176
+ ![61eb4c19-3e7a-4791-a266-de4209690cbd](README_zh.assets/61eb4c19-3e7a-4791-a266-de4209690cbd.png)
177
+
178
+ 5. Run Docker
179
+
180
+ ![shortcut](README_zh.assets/shortcut.png)
181
+
182
+ 6. Accept the agreement and skip login on first run
183
+
184
+ ![accept](README_zh.assets/accept.png)
185
+
186
+ ![576746d5-5215-4973-b1ca-c8d7409a6403](README_zh.assets/576746d5-5215-4973-b1ca-c8d7409a6403.png)
187
+
188
+ ![9a10b7b2-1eea-48c1-b7af-34129fe04446](README_zh.assets/9a10b7b2-1eea-48c1-b7af-34129fe04446.png)
189
+
190
+ ### Installing the Server
191
+
192
+ Installation using Docker, docker-compose as follows:
193
+
194
+ 1. The `docker-compose.yml` file is in the `/deploy` directory.
195
+ 2. Execute `docker-compose up -d` in the `/deploy` directory, <u>if you want to use the lite version, execute `docker-compose -f docker-compose-lite.yml up -d`</u>
196
+ 3. Wait patiently (about half an hour, speed depends on network), download will consume about 70GB of traffic, make sure to use WiFi
197
+ 4. When you see three services in Docker, it indicates success
198
+
199
+ ![e29d1922-7c58-46b4-b1e9-961f853f26d4](README_zh.assets/e29d1922-7c58-46b4-b1e9-961f853f26d4.png)
200
+
201
+ ### Client
202
+
203
+ 1. Directly download the [officially built installation package](https://github.com/GuijiAI/HeyGem.ai/releases)
204
+ 2. Double-click `HeyGem-x.x.x-setup.exe` to install
205
+
206
+ ## Ubuntu 22.04 Installation
207
+
208
+ ### Recommended Configuration
209
+
210
+ - CPU: 13th Gen Intel Core i5-13400F
211
+ - Memory: 32GB or more (required)
212
+ - Graphics Card: RTX-4070 (ensure you have an NVIDIA graphics card and the driver is correctly installed)
213
+ - Hard Disk: More than 100GB of free space
214
+
215
+ ### Install Docker
216
+
217
+ > First, check if Docker is installed using `docker --version`. If it is installed, skip the following steps.
218
+
219
+ 1. Directly download the [officially built installation package](https://github.com/GuijiAI/HeyGem.ai/releases) for the Linux version
220
+ 2. Double-click `HeyGem-x.x.x.AppImage` to launch, no installation required
221
+
222
+ > Reminder: On Ubuntu systems, if you are using the `root` user to access the desktop, double-clicking `HeyGem-x.x.x.AppImage` may not work. You need to execute `./HeyGem-x.x.x.AppImage --no-sandbox` in the terminal, adding the `--no-sandbox` parameter.
223
+
224
+ ## Open APIs
225
+
226
+ We have opened APIs for model training and video synthesis. After Docker starts, several ports will be exposed locally, accessible through `http://127.0.0.1`.
227
+
228
+ For specific code, refer to:
229
+
230
+ - src/main/service/model.js
231
+ - src/main/service/video.js
232
+ - src/main/service/voice.js
233
+
234
+ ### Model Training
235
+
236
+ 1. Separate video into silent video + audio
237
+ 2. Place audio in `D:\heygem_data\voice\data`
238
+ > `D:\heygem_data\voice\data` is agreed with the `guiji2025/fish-speech-ziming` service, can be modified in docker-compose
239
+ 3. Call the `http://127.0.0.1:18180/v1/preprocess_and_tran` interface
240
+ > Parameter example:
241
+ >
242
+ > ```json
243
+ > {
244
+ > "format": ".wav",
245
+ > "reference_audio": "xxxxxx/xxxxx.wav",
246
+ > "lang": "zh"
247
+ > }
248
+ > ```
249
+ >
250
+ > Response example:
251
+ >
252
+ > ```json
253
+ > {
254
+ > "asr_format_audio_url": "xxxx/x/xxx/xxx.wav",
255
+ > "reference_audio_text": "xxxxxxxxxxxx"
256
+ > }
257
+ > ```
258
+ >
259
+ > **Record the response results as they will be needed for subsequent audio synthesis**
260
+
261
+ ### Audio Synthesis
262
+
263
+ Interface: `http://127.0.0.1:18180/v1/invoke`
264
+
265
+ ```json
266
+ // Request parameters
267
+ {
268
+ "speaker": "{uuid}", // A unique UUID
269
+ "text": "xxxxxxxxxx", // Text content to synthesize
270
+ "format": "wav", // Fixed parameter
271
+ "topP": 0.7, // Fixed parameter
272
+ "max_new_tokens": 1024, // Fixed parameter
273
+ "chunk_length": 100, // Fixed parameter
274
+ "repetition_penalty": 1.2, // Fixed parameter
275
+ "temperature": 0.7, // Fixed parameter
276
+ "need_asr": false, // Fixed parameter
277
+ "streaming": false, // Fixed parameter
278
+ "is_fixed_seed": 0, // Fixed parameter
279
+ "is_norm": 0, // Fixed parameter
280
+ "reference_audio": "{voice.asr_format_audio_url}", // Return value from previous "Model Training" step
281
+ "reference_text": "{voice.reference_audio_text}" // Return value from previous "Model Training" step
282
+ }
283
+ ```
284
+
285
+ ### Video Synthesis
286
+
287
+ - Synthesis interface: `http://127.0.0.1:8383/easy/submit`
288
+
289
+ ```json
290
+ // Request parameters
291
+ {
292
+ "audio_url": "{audioPath}", // Audio path
293
+ "video_url": "{videoPath}", // Video path
294
+ "code": "{uuid}", // Unique key
295
+ "chaofen": 0, // Fixed value
296
+ "watermark_switch": 0, // Fixed value
297
+ "pn": 1 // Fixed value
298
+ }
299
+ ```
300
+
301
+ - Progress query: `http://127.0.0.1:8383/easy/query?code=${taskCode}`
302
+ > GET request, the parameter `taskCode` is the `code` from the synthesis interface input above
303
+
304
+ ## Self-Check Steps Before Asking Questions
305
+
306
+ 1. Check if all three services are in Running status
307
+
308
+ ![e29d1922-7c58-46b4-b1e9-961f853f26d4](./doc/常见问题.assets/e29d1922-7c58-46b4-b1e9-961f853f26d4.png)
309
+
310
+ 2. Confirm that your machine has an NVIDIA graphics card and drivers are correctly installed.
311
+
312
+ All computing power for this project is local. The three services won't start without an NVIDIA graphics card or proper drivers.
313
+
314
+ 3. Ensure both server and client are updated to the latest version. The project is newly open-sourced, the community is very active, and updates are frequent. Your issue might have been resolved in a new version.
315
+
316
+ - Server: Go to `/deploy` directory and re-execute `docker-compose up -d`
317
+ - Client: `pull` code and re-`build`
318
+
319
+ 4. [GitHub Issues](https://github.com/GuijiAI/HeyGem.ai/issues) are continuously updated, issues are being resolved and closed daily. Check frequently, your issue might already be resolved.
320
+
321
+ ## Question Template
322
+
323
+ 1. Problem Description
324
+
325
+ Describe the reproduction steps in detail, with screenshots if possible.
326
+
327
+ 2. Provide Error Logs
328
+
329
+ - How to get client logs:
330
+
331
+ ![image-20250308205954494](./doc/常见问题.assets/image-20250308205954494.png)
332
+
333
+ - Server logs:
334
+
335
+ Find the key location, or click on our three Docker services, and "Copy" as shown below.
336
+
337
+ ![image-20250308215812201](./doc/常见问题.assets/image-20250308215812201.png)
338
+
339
+ ## Contact Us
340
+
341
+ ```
342
+ James@toolwiz.com
343
+ ```
344
+
345
+ ## License
346
+
347
+ [LICENSE](./LICENSE)
348
+
349
+ ## Acknowledgments
350
+
351
+ - ASR based on [fun-asr](https://github.com/modelscope/FunASR)
352
+ - TTS based on [fish-speech-ziming](https://github.com/fishaudio/fish-speech)
353
+
354
+ ## Star History
355
+
356
+ [![Star History Chart](https://api.star-history.com/svg?repos=GuijiAI/HeyGem.ai&type=Date)](https://www.star-history.com/#GuijiAI/HeyGem.ai&Date)
README_zh.assets/2025-03-20_14-38-00.jpg ADDED
README_zh.assets/576746d5-5215-4973-b1ca-c8d7409a6403.png ADDED
README_zh.assets/61eb4c19-3e7a-4791-a266-de4209690cbd.png ADDED
README_zh.assets/9a10b7b2-1eea-48c1-b7af-34129fe04446.png ADDED
README_zh.assets/accept.png ADDED

Git LFS Details

  • SHA256: f4da684dbd32f6d6ef77c72aee620acecc537498dfd2e6ae4f8df84ead3fe28f
  • Pointer size: 131 Bytes
  • Size of remote file: 768 kB
README_zh.assets/cb10263a14cc826e22c2be4bcae01a89.jpg ADDED

Git LFS Details

  • SHA256: 9aea2d7456233f75769064783fea5906589e84654629ca6eed1c2a821add8b46
  • Pointer size: 131 Bytes
  • Size of remote file: 172 kB
README_zh.assets/coze-video.png ADDED
README_zh.assets/e29d1922-7c58-46b4-b1e9-961f853f26d4.png ADDED
README_zh.assets/image-20250304114114272.png ADDED

Git LFS Details

  • SHA256: c1b757608510bab4764f1f0e0d3cdbabaf07420793efd804724c65f04a2e892b
  • Pointer size: 131 Bytes
  • Size of remote file: 186 kB
README_zh.assets/nvidia.png ADDED

Git LFS Details

  • SHA256: 4854c6601c793f17c443c30ca9a26439f256d734e582a67455d270369b1961f8
  • Pointer size: 131 Bytes
  • Size of remote file: 911 kB
README_zh.assets/output.png ADDED

Git LFS Details

  • SHA256: 1bf1a8548c091cf9c934b2350e4e2ee3158884f54bf1a6f1c2093554f875e664
  • Pointer size: 131 Bytes
  • Size of remote file: 113 kB
README_zh.assets/shortcut.png ADDED
README_zh.assets/updatewsl.png ADDED
README_zh.assets/wsl-list.png ADDED
README_zh.md ADDED
@@ -0,0 +1,418 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Heygem - Heygen的开源平替产品 [【Switch to English】](./README.md)
2
+ ## 【公告】
3
+ Heygem 数字人克隆智能体和插件已成功上线至Coze 平台 ,无需复杂部署,即使是小白用户也能轻松上手直接使用。
4
+
5
+ 戳这里秒达Coze商店体验👉[硅基智能数字人克隆智能体](https://www.coze.cn/store/agent/7488696243959431206?bid=6ftfk9dtg0g12) | [硅基智能数字人克隆插件](https://www.coze.cn/store/plugin/7488926246634782746)
6
+
7
+ 扫码观看操作视频
8
+
9
+ <img src="./README_zh.assets/coze-video.png" width="50%">
10
+
11
+ ## 【新增Ubuntu版本通知】
12
+
13
+ **Ubuntu版本正式发布**
14
+
15
+ 1. 目前已完成 Ubuntu 22.04 Desktop 版本(内核 6.8.0-52-generic)的适配验证工作。其他 Linux 版本暂未进行兼容性测试。
16
+ 2. 补充客户端程序界面国际化(英文)。
17
+ 3. 修复一些已知问题
18
+ - #304
19
+ - #292
20
+ 4. [Ubuntu22.04 安装文档](https://github.com/GuijiAI/HeyGem.ai/blob/main/README_zh.md#ubuntu2204-%E5%AE%89%E8%A3%85)
21
+
22
+ ## 【致开发者伙伴的重要通知】
23
+
24
+ **亲爱的Heygem开源社区成员:**
25
+
26
+ 衷心感谢各位对Heygem数字人开源项目的热情关注与积极参与!我们注意到部分开发者在本地部署环节遇到挑战,为更好地满足不同场景需求,现同步告知两项并行服务方案:
27
+
28
+ | **项目** | **HeyGem开源本地部署** | **数字人/克隆音API接口服务** |
29
+ | -------- | ---------------------------------------------------------------------------------- | ------------------------------------------------------------------------- |
30
+ | 使用方式 | 开源本地部署 | 极速克隆API服务 |
31
+ | 推荐 | 技术型用户 | 业务型用户 |
32
+ | 技术门槛 | 具备深度学习框架经验/追求深度定制化/希望参与社区共建的开发者 | 快速业务集成/专注上层应用开发/需企业级SLA保障的商用场景 |
33
+ | 硬件要求 | 需要购买GPU服务器 | 无需购买GPU服务器 |
34
+ | 定制化 | 可以根据自己的需求对代码进行修改和扩展,完全掌控软件的功能和行为 | 无法直接修改源代码,只能通过API提供的接口进行功能扩展,灵活性不如开源项目 |
35
+ | 技术支持 | 社区支持 | 动态扩容支持 + 专业技术响应团队 |
36
+ | 维护成本 | 维护成本高 | 维护简单 |
37
+ | 口形效果 | 效果可用 | 效果惊艳且更高清 |
38
+ | 商用授权 | 支持全球免费商用(用户量超过10万或年营收达1000万美元以上的企业需签署商业许可协议) | 可商用 |
39
+ | 迭代速度 | 更新慢,Bug修复依赖社区 | 最新模型/算法优先适用,问题修复快 |
40
+
41
+ 我们始终秉持开源初心,API服务的推出旨在为不同需求的开发者提供更完整的解决方案矩阵。无论您选择哪种方式,都可随时通过James@toolwiz.com获取技术支持文档。期待与各位共同推动数字人技术的普惠发展!
42
+
43
+ **硅基智能开发者团队**
44
+
45
+ <a href="https://mp.weixin.qq.com/s/vKiBR85E7JyRkr6CxLCppA?mpshare=1&scene=1&srcid=0319sszkopZO6870sGsU0TFc&sharer_shareinfo=cac5ec3bfa62ed558552c7c022821613&sharer_shareinfo_first=cac5ec3bfa62ed558552c7c022821613&from=industrynews#rd" target="_blank">从零开始,手把手教你打造专属HeyGem 开源AI数字人!</a>
46
+
47
+ [**极速克隆API**](https://app.guiji.cn/platform) | [**API文档中心**](https://guiji.cn/digital-docs/introduce/)
48
+
49
+ [**实时交互SDK**](https://app.guiji.cn/platform) | [**SDK文档中心**](https://guiji.cn/duix-light-document/introduce/)
50
+
51
+ [**本地实时交互(realtime)duix.ai 开源地址**](https://github.com/GuijiAI/duix.ai) |
52
+ [**Android版本**](https://github.com/GuijiAI/duix.ai/blob/main/duix-android/dh_aigc_android/README.md) |
53
+ [**IOS版本**](https://github.com/GuijiAI/duix.ai/blob/main/duix-ios/GJLocalDigitalDemo/GJLocalDigitalSDK.md)
54
+
55
+ <img src="./README_zh.assets/cb10263a14cc826e22c2be4bcae01a89.jpg" width="50%">
56
+
57
+
58
+ ## 【开源共创·荣耀共享���
59
+
60
+ 自从我们开源了Heygem,全球极客已在代码宇宙中点亮数字分身矩阵,每个commit都在重构未来!但独乐乐不如众乐乐——现在诚邀各路大神加入「开源共创计划」,让AI创意赋能每个人,一起推动中国AI舰队驶向星辰大海!
61
+
62
+ 1. 共创内容方向
63
+
64
+ 分享Heygem部署教程、优化指南、实战案例等高质量视频或文章(B站、抖音、小红书、公众号、知乎等)
65
+
66
+ 2. 开源共创特供奖励池(真金白银奉上!)
67
+
68
+ (1)基础奖励
69
+
70
+ 内容获得 20-100 点赞,获评【Heygem.ai 大师奖】及 20 元现金大师🧧
71
+
72
+ 内容获得 100+ 点赞,获评【Heygem.ai 之神奖】及 50 元现金大神🧧
73
+
74
+ (2)特殊成就:
75
+
76
+ 月度MVP将解锁开源名人堂数字勋章(永久上链)
77
+
78
+ 3. 参与方式
79
+
80
+ 你的创意发送至至客服小姐姐,加好友备注“姓名+999”
81
+
82
+ <img src="./README_zh.assets/2025-03-20_14-38-00.jpg" width="50%">
83
+
84
+
85
+ ## 共创优秀作品展
86
+
87
+ [HeyGem数字人一键启动,8G显存可用,模型体积10G,不需要100G硬盘空间,不需要d盘,基于Docker单镜像,硅基开源](https://www.bilibili.com/video/BV1awQqYZEqB/?spm_id_from=333.337.search-card.all.click&vd_source=618f44772c5dafb47317bb728505d79c)
88
+
89
+ [Ai数字人16-本地部署!最火爆开源数字人HeyGem零基础手把手教学搭建教程,20%生成卡住解决方法,全套简化流程配套文件分享-T8 comfyui教程](https://www.bilibili.com/video/BV1ACQSYEErF/?spm_id_from=333.337.search-card.all.click&vd_source=618f44772c5dafb47317bb728505d79c)
90
+
91
+ [heygem开源见证历史了!赛博打工人革命啊!](https://www.bilibili.com/video/BV1R3QpYsEY6/?spm_id_from=333.337.search-card.all.click&vd_source=618f44772c5dafb47317bb728505d79c)
92
+
93
+ [数字人项目Heygem本地部署教程](https://www.bilibili.com/video/BV1eWQ6YgEcp/?spm_id_from=333.337.search-card.all.click&vd_source=618f44772c5dafb47317bb728505d79c)
94
+
95
+ [真香!从付费到开源,AI数字人将开启新时代](http://xhslink.com/a/rQPYqoDSRih8)
96
+
97
+ [开源免费的数字人来了,不限次数,快速克隆](http://xhslink.com/a/tX3p5V5tajh8)
98
+
99
+ [AI数字人免费啦!GitHub爆火项目电脑就能跑](http://xhslink.com/a/8UT1kQ7vxjh8)
100
+
101
+ [最火爆免费AI数字人,HeyGem V1.0.3,最新更新,一键整合包!口型效果超强,速度飞起,支持长视频、批量生成,8G显存可用!](https://www.bilibili.com/video/BV1SkoCYpEwh/?share_source=copy_web&vd_source=c38dcdb72a68f2a4e0b3c0f4f9a5a03c)
102
+
103
+ [【HeyGem】一键包 windows直接运行 无需docker 硅基开源数字人](https://www.bilibili.com/video/BV1ZgovYGE3u/)
104
+
105
+ ## 介绍
106
+
107
+ <img src="README_zh.assets/image-20250304114114272.png">
108
+
109
+ Heygem是一款专为Windows系统设计的全离线视频合成工具,它能够精确克隆您的外貌和声音,让您的形象数字化。您可以通过文字和语音驱动虚拟形象,进行视频制作。无需联网,保护隐私的同时,也能享受到便捷和高效的数字体验。
110
+
111
+ - 核心功能
112
+ - 精确外貌与声音克隆:运用先进的 AI 算法,高精度捕捉真人外貌特征,包括五官形状、面部轮廓等,构建逼真虚拟模型。同时,能精准克隆声音,捕捉并还原人声的细微特征,支持多种声音参数设置,可创造与原声高度相似的克隆效果。
113
+ - 文字和语音驱动虚拟形象:通过自然语言处理技术理解文本内容,将文字转换为自然流畅的语音,实现文字驱动虚拟形象。也可直接使用语音输入,让虚拟形象根据语音的节奏、语调等进行相应的动作和表情变化,使虚拟形象的表现更加自然、生动。
114
+ - 高效视频合成:将数字人的视频画面与声音高度同步,实现自然流畅的口型匹配,智能优化音视频同步效果。
115
+ - 多语言:脚本支持八种语言,英语、日语、韩语、中文、法语、德语、阿拉伯语和西班牙语。
116
+ - 显著优势
117
+ - 全离线操作:无需联网即可使用,有效保护用户隐私,让用户在安全、独立的环境中进行创作,避免数据在网络传输过程中可能存在的泄露风险。
118
+ - 简单易用:操作界面简洁直观,即使是没有任何技术背景的小白也能轻松上手,快速掌握软件的使用方法,轻松开启数字人创作之旅。
119
+ - 多模型支持:支持导入多个模型,并通过一键启动包进行管理,方便用户根据不同的创作需求和应用场景选择合适的模型。
120
+ - 技术支持
121
+ - 声音克隆技术:利用人工智能等先进技术,根据给定的声音样本生成与之相似或相同声音的技术,涵盖语音中的语境、语调、语速等。
122
+ - 自动语音识别:一种能将人类语音中的词汇内容转换为计算机可读输入,也就是转换为文本格式的技术,让计算机能够 “听懂” 人们说的话。
123
+ - 计算机视觉技术:用于视频合成中的视觉处理,包括面部识别、口型分析等,确保虚拟形象的口型与声音和文字内容相匹配。
124
+
125
+ ## 依赖
126
+
127
+ 1. Nodejs 18
128
+ 2. Docker Image
129
+ - docker pull guiji2025/fun-asr
130
+ - docker pull guiji2025/fish-speech-ziming
131
+ - docker pull guiji2025/heygem.ai
132
+
133
+ ## Windows 安装
134
+
135
+ ### 前置条件
136
+
137
+ 1. 必须有 D 盘:主要用于后续数字人、作品等数据存储
138
+ - 空闲空间要求:大于30G
139
+ 2. C 盘:用于存储服务镜像文件
140
+
141
+ - 空闲空间要求:大于 100G
142
+ - 如果不足 100G,可以在安装完成docker后,在下图的位置重新选一个剩余空间大于 100G 的磁盘文件夹。
143
+
144
+ ![output](README_zh.assets/output.png)
145
+
146
+ 3. 系统要求:
147
+ - 目前支持 Windows 10 19042.1526 或更高版本
148
+ 4. 推荐配置:
149
+ - CPU:第13代英特尔酷睿 i5-13400F
150
+ - 内存:32G及以上(必要)
151
+ - 显卡:rtx-4070
152
+ 5. 确保有英伟达显卡,并正确安装显卡驱动(必要)
153
+
154
+ 英伟达驱动下载地址 https://www.nvidia.cn/drivers/lookup/
155
+
156
+ ![nvidia](README_zh.assets/nvidia.png)
157
+
158
+ ### 安装 Windows Docker
159
+
160
+ 1. 用wsl --list --verbose命令可以查看本机有没有安装过wsl,如下图就是已经安装过,无需再安装
161
+
162
+ ![image-20250310111137019](./README_zh.assets/wsl-list.png)
163
+
164
+ > - 安装wsl的命令:`wsl --install`
165
+ > - 由于网络原因,可能失败,多试几次
166
+ > - 安装过程中需要设置新的用户名和密码,设置并记住
167
+
168
+ 2. 用wsl --update更新wsl。
169
+
170
+ ![updatewsl](README_zh.assets/updatewsl.png)
171
+
172
+ 3. [下载 Docker Windows 版](https://www.docker.com/),根据机器 CPU 架构选择不同的安装包。
173
+
174
+ 4. 出现这个界面表示安装成功。
175
+
176
+ ![61eb4c19-3e7a-4791-a266-de4209690cbd](README_zh.assets/61eb4c19-3e7a-4791-a266-de4209690cbd.png)
177
+
178
+ 5. 运行 Docker
179
+
180
+ ![shortcut](README_zh.assets/shortcut.png)
181
+
182
+ 6. 首次运行接受协议和跳过登录
183
+
184
+ ![accept](README_zh.assets/accept.png)
185
+
186
+ ![576746d5-5215-4973-b1ca-c8d7409a6403](README_zh.assets/576746d5-5215-4973-b1ca-c8d7409a6403.png)
187
+
188
+ ![9a10b7b2-1eea-48c1-b7af-34129fe04446](README_zh.assets/9a10b7b2-1eea-48c1-b7af-34129fe04446.png)
189
+
190
+ ### 安装服务端
191
+
192
+ 采用Docker方式安装,docker-compose如下:
193
+
194
+ 1. `docker-compose.yml`文件在`/deploy`目录下。
195
+ 2. 在`/deploy`目录执行`docker-compose up -d`,<u>如果您想使用lite版本,请执行`docker-compose -f docker-compose-lite.yml up -d`</u>
196
+ 3. 耐心等待一段时间(半小时左右,速度取决于网速),下载会消耗70G左右流量,注意连WIFI
197
+ 4. 看到Dokcer 中出现三个服务,表示成功了
198
+
199
+ ![e29d1922-7c58-46b4-b1e9-961f853f26d4](README_zh.assets/e29d1922-7c58-46b4-b1e9-961f853f26d4.png)
200
+
201
+ ### 客户端
202
+
203
+ 1. 直接下载[官方构建的安装包](https://github.com/GuijiAI/HeyGem.ai/releases)
204
+ 2. 双击`HeyGem-x.x.x-setup.exe`即可安装
205
+
206
+ ## Ubuntu22.04 安装
207
+
208
+ ### 推荐配置
209
+
210
+ - CPU:第13代英特尔酷睿 i5-13400F
211
+ - 内存:32G及以上(必要)
212
+ - 显卡:rtx-4070(确保有英伟达显卡,并正确安装显卡驱动)
213
+ - 硬盘:空闲空间大于 100G
214
+
215
+ ### 安装 Docker
216
+
217
+ > 先用`docker --version`检查是否安装了docker,如果安装了,则跳过以下步骤
218
+
219
+ ```bash
220
+ sudo apt update
221
+ sudo apt install docker.io
222
+ sudo apt install docker-compose
223
+ ```
224
+
225
+ ### 安装显卡驱动
226
+
227
+ 1. 参考官方文档安装显卡驱动[https://www.nvidia.cn/drivers/lookup/](https://www.nvidia.cn/drivers/lookup/)
228
+
229
+ > 安装后执行`nvidia-smi`命令,如果显示显卡信息,则安装成功
230
+
231
+ 2. 安装 NVIDIA Container Toolkit
232
+
233
+ NVIDIA Container Toolkit 是 Docker 使用 NVIDIA GPU 的必要工具。安装步骤如下:
234
+ - 添加 NVIDIA 包仓库:
235
+ ```bash
236
+ distribution=$(. /etc/os-release;echo $ID$VERSION_ID) \
237
+ && curl -s -L https://nvidia.github.io/libnvidia-container/gpgkey | sudo apt-key add - \
238
+ && curl -s -L https://nvidia.github.io/libnvidia-container/$distribution/libnvidia-container.list | sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
239
+ ```
240
+ - 更新包列表并安装工具包:
241
+ ```bash
242
+ sudo apt-get update
243
+ sudo apt-get install -y nvidia-container-toolkit
244
+ ```
245
+ - 配置 Docker 使用 NVIDIA 运行时:
246
+ ```bash
247
+ sudo nvidia-ctk runtime configure --runtime=docker
248
+ ```
249
+ - 重启 Docker 服务:
250
+ ```bash
251
+ sudo systemctl restart docker
252
+ ```
253
+
254
+ ### 安装服务端
255
+
256
+ ```bash
257
+ cd /deploy
258
+ docker-compose -f docker-compose-linux.yml up -d
259
+ ```
260
+
261
+ > 与windows上拉镜像一样,如果下载太慢,需要指定国内镜像源方法是在`/etc/docker/daemon.json`文件中添加:
262
+ >
263
+ > ```json
264
+ > {
265
+ > "registry-mirrors": [
266
+ > "https://hub.fast360.xyz",
267
+ > "https://hub.littlediary.cn",
268
+ > "https://docker.kejilion.pro",
269
+ > "https://docker.1panelproxy.com"
270
+ > ]
271
+ > }
272
+ > ```
273
+ > 上面四个镜像源,随着时间推移,可能会有变化,请自行搜索最新的镜像源
274
+
275
+ ### 客户端
276
+
277
+ 1. 直接下载[官方构建���安装包](https://github.com/GuijiAI/HeyGem.ai/releases)的Linux版本
278
+ 2. 双击`HeyGem-x.x.x.AppImage`即可启动,无需安装
279
+
280
+ > 提醒:在Ubuntu系统中,如果您使用`root`用户进入桌面,直接双击`HeyGem-x.x.x.AppImage`可能运行不了,需要在命令行终端中执行`./HeyGem-x.x.x.AppImage --no-sandbox`,加上`--no-sandbox`参数即可。
281
+
282
+ ## 开放 API
283
+
284
+ 我们开放了模特训练和视频合成的API,Docker 启动后会在本地暴露几个端口,通过`http://127.0.0.1`可以调用。
285
+
286
+ 具体代码可以参考
287
+
288
+ - src/main/service/model.js
289
+ - src/main/service/video.js
290
+ - src/main/service/voice.js
291
+
292
+ ### 模特训练
293
+
294
+ 1. 将视频分离为静音视频 + 音频
295
+ 2. 音频放到`D:\heygem_data\voice\data`下
296
+ > `D:\heygem_data\voice\data`是与`guiji2025/fish-speech-ziming`服务约定的,可以在docker-compose中修改
297
+ 3. 调用`http://127.0.0.1:18180/v1/preprocess_and_tran`接口
298
+ > 参数示例:
299
+ >
300
+ > ```json
301
+ > {
302
+ > "format": ".wav",
303
+ > "reference_audio": "xxxxxx/xxxxx.wav",
304
+ > "lang": "zh"
305
+ > }
306
+ > ```
307
+ >
308
+ > 返回示例:
309
+ >
310
+ > ```json
311
+ > {
312
+ > "asr_format_audio_url": "xxxx/x/xxx/xxx.wav",
313
+ > "reference_audio_text": "xxxxxxxxxxxx"
314
+ > }
315
+ > ```
316
+ >
317
+ > **记录下返回结果后续音频合成需要用到**
318
+
319
+ ### 音频合成
320
+
321
+ 接口:`http://127.0.0.1:18180/v1/invoke`
322
+
323
+ ```json
324
+ // 请求参数
325
+ {
326
+ "speaker": "{uuid}", // 一个UUID保持唯一即可
327
+ "text": "xxxxxxxxxx", // 需要合成的文本内容
328
+ "format": "wav", // 固定传参
329
+ "topP": 0.7, // 固定传参
330
+ "max_new_tokens": 1024, // 固定传参
331
+ "chunk_length": 100, // 固定传参
332
+ "repetition_penalty": 1.2, // 固定传
333
+ "temperature": 0.7, // 固定传参
334
+ "need_asr": false, // 固定传参
335
+ "streaming": false, // 固定传参
336
+ "is_fixed_seed": 0, // 固定传参
337
+ "is_norm": 0, // 固定传参
338
+ "reference_audio": "{voice.asr_format_audio_url}", // 上一步“模特训练”的返回值
339
+ "reference_text": "{voice.reference_audio_text}" // 上一步“模特训练”的返回值
340
+ }
341
+ ```
342
+
343
+ ### 视频合成
344
+
345
+ - 合成接口:`http://127.0.0.1:8383/easy/submit`
346
+
347
+ ```json
348
+ // 请求参数
349
+ {
350
+ "audio_url": "{audioPath}", // 音频路径
351
+ "video_url": "{videoPath}", // 视频路径
352
+ "code": "{uuid}", // 唯一key
353
+ "chaofen": 0, // 固定值
354
+ "watermark_switch": 0, // 固定值
355
+ "pn": 1 // 固定值
356
+ }
357
+ ```
358
+
359
+ - 进度查询:`http://127.0.0.1:8383/easy/query?code=${taskCode}`
360
+ > get 请求,参数`taskCode`是上面合成接口入参中的`code`
361
+
362
+ ## 常见问题
363
+
364
+ [常见问题](./doc/常见问题.md)
365
+
366
+ ## 提问前自查步骤
367
+
368
+ 1. 三个服务是否都是Running状态
369
+
370
+ ![e29d1922-7c58-46b4-b1e9-961f853f26d4](./doc/常见问题.assets/e29d1922-7c58-46b4-b1e9-961f853f26d4.png)
371
+
372
+ 2. 确认机器上是有英伟达显卡且正确安装了驱动程序。
373
+
374
+ 本项目所有算力都在本地,没有英伟达显卡或没有驱动程序,以上三个服务是启动不了的。
375
+
376
+ 3. 确保服务端和客户端都更新到了最新版本,项目刚开源,社区很活跃,更新也比较频繁,说不定你的问题已经在新版中解决了。
377
+
378
+ - 服务端:到`/deploy`目录下重新执行`docker-compose up -d`
379
+ - 客户端:`pull`代码后重新`build`
380
+
381
+ 4. [GitHub Issuse](https://github.com/GuijiAI/HeyGem.ai/issues)持续更新,每天都在解决和关闭问题单,经常看看,也许你的问题已经解决了。
382
+
383
+ ## 提问模板
384
+
385
+ 1. 问题描述
386
+
387
+ 详细描述一下复现步骤,如有截图最好。
388
+
389
+ 2. 提供报错日志
390
+
391
+ - 客户端日志获取方式
392
+
393
+ ![image-20250308205954494](./doc/常见问题.assets/image-20250308205954494.png)
394
+
395
+ - 服务端日志
396
+
397
+ 找到关键位置,或点开我们的三个Docker服务,如下图操作“复制”。
398
+
399
+ ![image-20250308215812201](./doc/常见问题.assets/image-20250308215812201.png)
400
+
401
+ ## 联系我们
402
+
403
+ ```
404
+ James@toolwiz.com
405
+ ```
406
+
407
+ ## 协议
408
+
409
+ [LICENSE](./LICENSE)
410
+
411
+ ## 致谢
412
+
413
+ - ASR 基于 [fun-asr](https://github.com/modelscope/FunASR)
414
+ - TTS 基于 [fish-speech-ziming](https://github.com/fishaudio/fish-speech)
415
+
416
+ ## Star History
417
+
418
+ [![Star History Chart](https://api.star-history.com/svg?repos=GuijiAI/HeyGem.ai&type=Date)](https://www.star-history.com/#GuijiAI/HeyGem.ai&Date)
build/entitlements.mac.plist ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
3
+ <plist version="1.0">
4
+ <dict>
5
+ <key>com.apple.security.cs.allow-jit</key>
6
+ <true/>
7
+ <key>com.apple.security.cs.allow-unsigned-executable-memory</key>
8
+ <true/>
9
+ <key>com.apple.security.cs.allow-dyld-environment-variables</key>
10
+ <true/>
11
+ </dict>
12
+ </plist>
build/icon.icns ADDED
Binary file (7.09 kB). View file
 
build/icon.ico ADDED

Git LFS Details

  • SHA256: e9e0b96d4e26980e7b4957e2db64a4885f6b5fe29fecdd5859690c8292209089
  • Pointer size: 131 Bytes
  • Size of remote file: 411 kB
build/icon.png ADDED
build/logo-outline.ico ADDED

Git LFS Details

  • SHA256: 4e98a605a2fb510289d4331b09f97d05db1d4fdd56df827f3eb528cbb262069f
  • Pointer size: 131 Bytes
  • Size of remote file: 411 kB
deploy/docker-compose-linux.yml ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ networks:
2
+ ai_network:
3
+ driver: bridge
4
+
5
+ services:
6
+ heygem-tts:
7
+ image: guiji2025/fish-speech-ziming
8
+ container_name: heygem-tts
9
+ restart: always
10
+ runtime: nvidia
11
+ environment:
12
+ - NVIDIA_VISIBLE_DEVICES=0
13
+ - NVIDIA_DRIVER_CAPABILITIES=compute,graphics,utility,video,display
14
+ ports:
15
+ - '18180:8080'
16
+ volumes:
17
+ - ~/heygem_data/voice/data:/code/data
18
+ command: /bin/bash -c "/opt/conda/envs/python310/bin/python3 tools/api_server.py --listen 0.0.0.0:8080"
19
+ networks:
20
+ - ai_network
21
+ heygem-asr:
22
+ image: guiji2025/fun-asr
23
+ container_name: heygem-asr
24
+ restart: always
25
+ runtime: nvidia
26
+ privileged: true
27
+ working_dir: /workspace/FunASR/runtime
28
+ ports:
29
+ - '10095:10095'
30
+ command: sh /run.sh
31
+ deploy:
32
+ resources:
33
+ reservations:
34
+ devices:
35
+ - driver: nvidia
36
+ count: all
37
+ capabilities: [gpu]
38
+ networks:
39
+ - ai_network
40
+ heygem-gen-video:
41
+ image: guiji2025/heygem.ai
42
+ container_name: heygem-gen-video
43
+ restart: always
44
+ runtime: nvidia
45
+ privileged: true
46
+ volumes:
47
+ - ~/heygem_data/face2face:/code/data
48
+ environment:
49
+ - PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512
50
+ deploy:
51
+ resources:
52
+ reservations:
53
+ devices:
54
+ - capabilities: [gpu]
55
+ shm_size: '8g'
56
+ ports:
57
+ - '8383:8383'
58
+ command: python /code/app_local.py
59
+ networks:
60
+ - ai_network
deploy/docker-compose-lite.yml ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ networks:
2
+ ai_network:
3
+ driver: bridge
4
+
5
+ services:
6
+ heygem-gen-video:
7
+ image: guiji2025/heygem.ai
8
+ container_name: heygem-gen-video
9
+ restart: always
10
+ runtime: nvidia
11
+ privileged: true
12
+ volumes:
13
+ - d:/heygem_data/face2face:/code/data
14
+ environment:
15
+ - PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512
16
+ deploy:
17
+ resources:
18
+ reservations:
19
+ devices:
20
+ - capabilities: [gpu]
21
+ shm_size: '8g'
22
+ ports:
23
+ - '8383:8383'
24
+ command: python /code/app_local.py
25
+ networks:
26
+ - ai_network
deploy/docker-compose.yml ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ networks:
2
+ ai_network:
3
+ driver: bridge
4
+
5
+ services:
6
+ heygem-tts:
7
+ image: guiji2025/fish-speech-ziming
8
+ container_name: heygem-tts
9
+ restart: always
10
+ runtime: nvidia
11
+ environment:
12
+ - NVIDIA_VISIBLE_DEVICES=0
13
+ - NVIDIA_DRIVER_CAPABILITIES=compute,graphics,utility,video,display
14
+ ports:
15
+ - '18180:8080'
16
+ volumes:
17
+ - d:/heygem_data/voice/data:/code/data
18
+ command: /bin/bash -c "/opt/conda/envs/python310/bin/python3 tools/api_server.py --listen 0.0.0.0:8080"
19
+ networks:
20
+ - ai_network
21
+ heygem-asr:
22
+ image: guiji2025/fun-asr
23
+ container_name: heygem-asr
24
+ restart: always
25
+ runtime: nvidia
26
+ privileged: true
27
+ working_dir: /workspace/FunASR/runtime
28
+ ports:
29
+ - '10095:10095'
30
+ command: sh /run.sh
31
+ deploy:
32
+ resources:
33
+ reservations:
34
+ devices:
35
+ - driver: nvidia
36
+ count: all
37
+ capabilities: [gpu]
38
+ networks:
39
+ - ai_network
40
+ heygem-gen-video:
41
+ image: guiji2025/heygem.ai
42
+ container_name: heygem-gen-video
43
+ restart: always
44
+ runtime: nvidia
45
+ privileged: true
46
+ volumes:
47
+ - d:/heygem_data/face2face:/code/data
48
+ environment:
49
+ - PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512
50
+ deploy:
51
+ resources:
52
+ reservations:
53
+ devices:
54
+ - capabilities: [gpu]
55
+ shm_size: '8g'
56
+ ports:
57
+ - '8383:8383'
58
+ command: python /code/app_local.py
59
+ networks:
60
+ - ai_network
dev-app-update.yml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ provider: generic
2
+ url: https://example.com/auto-updates
3
+ updaterCacheDirName: heygem-updater
doc/常见问题.assets/2025-03-25_09-36-07.jpg ADDED

Git LFS Details

  • SHA256: 41a14cd994ef211918edca438f7a68872edc6729d4d78397b4d0c7830c2468bd
  • Pointer size: 131 Bytes
  • Size of remote file: 247 kB
doc//345/270/270/350/247/201/351/227/256/351/242/230.assets/3074702f4d2eefb2faf1df3e1cb9cf2.png ADDED
doc//345/270/270/350/247/201/351/227/256/351/242/230.assets/e29d1922-7c58-46b4-b1e9-961f853f26d4.png ADDED
doc/常见问题.assets/image-20250308205954494.png ADDED

Git LFS Details

  • SHA256: 9d86a530f1220bbc5dcdc5d707c0b0cb0e0559aa27c873e9a7d778f65fca38a2
  • Pointer size: 131 Bytes
  • Size of remote file: 106 kB
doc//345/270/270/350/247/201/351/227/256/351/242/230.assets/image-20250308212642892.png ADDED
doc/常见问题.assets/image-20250308213957568.png ADDED

Git LFS Details

  • SHA256: aec12c9382d686e8dc55f1e61f8f6275b1da09afa3362a315697493674f5eee1
  • Pointer size: 131 Bytes
  • Size of remote file: 178 kB
doc/常见问题.assets/image-20250308215812201.png ADDED

Git LFS Details

  • SHA256: d57bc37ae889040222fb46f4c4a2f0f679ed9f79f3ce19a62b14c60623111299
  • Pointer size: 131 Bytes
  • Size of remote file: 308 kB
doc//345/270/270/350/247/201/351/227/256/351/242/230.assets/image-20250311143803466.png ADDED
doc/常见问题.md ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Heygem
2
+
3
+ ## 自查步骤
4
+
5
+ 1. 三个服务是否都是Running状态
6
+
7
+ ![e29d1922-7c58-46b4-b1e9-961f853f26d4](常见问题.assets/e29d1922-7c58-46b4-b1e9-961f853f26d4.png)
8
+
9
+ 2. 确认机器上是有英伟达显卡且正确安装了驱动程序。
10
+
11
+ 本项目所有算力都在本地,没有英伟达显卡或没有驱动程序,以上三个服务是启动不了的。
12
+
13
+ 3. 确保服务端和客户端都更新到了最新版本,项目刚开源,社区很活跃,更新也比较频繁,说不定你的问题已经在新版中解决了。
14
+
15
+ - 服务端:到`/deploy`目录下重新执行`docker-compose up -d`
16
+ - 客户端:`pull`代码后重新`build`
17
+
18
+ 4. [GitHub Issuse](https://github.com/GuijiAI/HeyGem.ai/issues)持续更新,每天都在解决和关闭问题单,经常看看,也许你的问题已经解决了。
19
+
20
+ ## 提问模板
21
+
22
+ 1. 问题描述
23
+
24
+ 详细描述一下复现步骤,如有截图最好。
25
+
26
+ 2. 提供报错日志
27
+
28
+ - 客户端日志获取方式
29
+
30
+ ![image-20250308205954494](常见问题.assets/image-20250308205954494.png)
31
+
32
+ - 服务端日志
33
+
34
+ 找到关键位置,或点开我们的三个Docker服务,如下图操作“复制”。
35
+
36
+ ![image-20250308215812201](常见问题.assets/image-20250308215812201.png)
37
+
38
+ ## 常见问题
39
+
40
+ 1. 执行`docker-compose up -d`连接失败,报错如下:
41
+ ```shell
42
+ docker-compose up -d
43
+ [+] Running 3/3
44
+ ✘ heygem-asr Error Get "https://registry-1.docker.io/v2/": net/http: request canceled while ... 15.1s
45
+ ✘ heygem-gen-video Error context canceled 15.1s
46
+ ✘ heygem-tts Error context canceled 15.1s
47
+ Error response from daemon: Get "https://registry-1.docker.io/v2/": net/http: request canceled while waiting for connection (Client.Timeout exceeded while awaiting headers)
48
+ ```
49
+
50
+ ![image-20250308212642892](常见问题.assets/image-20250308212642892.png)
51
+
52
+ - Docker Hub 官方源连接不稳定,您需要打开VPN的全局模式
53
+
54
+ - 或者使用国内镜像源,如下图设置
55
+
56
+ ![2025-03-25_09-36-07](./常见问题.assets/2025-03-25_09-36-07.jpg)
57
+ ```json
58
+ {
59
+ "builder": {
60
+ "gc": {
61
+ "defaultKeepStorage": "20GB",
62
+ "enabled": true
63
+ }
64
+ },
65
+ "experimental": false,
66
+ "registry-mirrors": [
67
+ "https://docker.zhai.cm",
68
+ "https://a.ussh.net",
69
+ "https://hub.littlediary.cn",
70
+ "https://hub.rat.dev",
71
+ "https://atomhub.openatom.cn",
72
+ "https://docker.m.daocloud.io",
73
+ "https://docker.1ms.run"
74
+ ]
75
+ }
76
+ ```
77
+
78
+ 2. 新增模特时报错如下图:
79
+
80
+ ![3074702f4d2eefb2faf1df3e1cb9cf2](常见问题.assets/3074702f4d2eefb2faf1df3e1cb9cf2.png)
81
+
82
+ - 用于创建模特的视频必须有声音,且是人在说话,程序需要用这个声音来做声音克隆
83
+
84
+ 3. heygen-tts 一直重启
85
+
86
+ https://github.com/GuijiAI/HeyGem.ai/issues/69
87
+
88
+ 4. 定制模特报错 Connection refused
89
+
90
+ 日志报错如下:
91
+ ```shell
92
+ 2025-03-13 14:38:34.476 | INFO | util.wav_util:format_wav:128 - wav标准格式化成功,/code/data/origin_audio/denoise_20250313223834179.wav -> /code/data/origin_audio/format_denoise_20250313223834179.wav
93
+ 2025-03-13 14:38:34.478 | INFO | util.wav_util:clean_wav:156 - 音频清理完成,新文件路径:/code/data/origin_audio/denoise_20250313223834179.wav
94
+ 2025-03-13 14:38:34.484 | INFO | util.wav_util:split_audio:95 - 原始音频小于20s,不在分割,直接返回, 时长为:10.587, 文件:/code/data/origin_audio/format_denoise_20250313223834179.wav
95
+ 2025-03-13 14:38:34.484 | INFO | asr_fun:asr:102 - fun asr start, wav_path:/code/data/origin_audio/format_denoise_20250313223834179.wav
96
+ 2025-03-13 14:38:34.487 | INFO | asr_fun:init_conn:40 - connect to url
97
+ 2025-03-13 14:38:34.487 | WARNING | asr_fun:init_conn:53 - 建立funasr连接异常:[Errno 111] Connection refused
98
+ Traceback (most recent call last):
99
+ File "/code/asr_fun.py", line 41, in init_conn
100
+ self.websocket = create_connection(uri, ssl=ssl_context, sslopt=ssl_opt)
101
+ File "/opt/conda/envs/python310/lib/python3.10/site-packages/websocket/_core.py", line 646, in create_connection
102
+ websock.connect(url, **options)
103
+ File "/opt/conda/envs/python310/lib/python3.10/site-packages/websocket/_core.py", line 256, in connect
104
+ self.sock, addrs = connect(
105
+ File "/opt/conda/envs/python310/lib/python3.10/site-packages/websocket/_http.py", line 145, in connect
106
+ sock = _open_socket(addrinfo_list, options.sockopt, options.timeout)
107
+ File "/opt/conda/envs/python310/lib/python3.10/site-packages/websocket/_http.py", line 232, in _open_socket
108
+ raise err
109
+ File "/opt/conda/envs/python310/lib/python3.10/site-packages/websocket/_http.py", line 209, in _open_socket
110
+ sock.connect(address)
111
+ ConnectionRefusedError: [Errno 111] Connection refused
112
+ 2025-03-13 14:38:43.357 | ERROR | tools.server.views_guiji:api_do_preprocess:118 - An error occurred: 'NoneType' object has no attribute 'send'
113
+ Stack trace:
114
+ Traceback (most recent call last):
115
+ File "/code/tools/server/views_guiji.py", line 105, in api_do_preprocess
116
+ rt = generate_reference_info(tts_item)
117
+ File "/code/tools/server/views_guiji.py", line 76, in generate_reference_info
118
+ text = asr_fun.asr(a_file)
119
+ File "/code/asr_fun.py", line 120, in asr
120
+ result: dict = rcg.close(timeout=3)
121
+ File "/code/asr_fun.py", line 86, in close
122
+ self.websocket.send(message)
123
+ AttributeError: 'NoneType' object has no attribute 'send'
124
+
125
+ 2025-03-13 14:38:43.357 | INFO | tools.server.views_guiji:api_do_preprocess:121 - 训练返回结果:{'code': -1, 'msg': "'NoneType' object has no attribute 'send'"}
126
+ ```
127
+
128
+ 是因为heygem-asr启动比较慢,服务端启动后等几分钟在进行克隆形象操作。如果机器内存太小(比如16G),可能启动不了。
electron-builder.yml ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ appId: com.heygem.app
2
+ productName: HeyGem
3
+ directories:
4
+ buildResources: build
5
+ files:
6
+ - '!**/.vscode/*'
7
+ - '!src/*'
8
+ - '!electron.vite.config.{js,ts,mjs,cjs}'
9
+ - '!{.eslintignore,.eslintrc.cjs,.prettierignore,.prettierrc.yaml,dev-app-update.yml,CHANGELOG.md,README.md}'
10
+ - '!{.env,.env.*,.npmrc,pnpm-lock.yaml}'
11
+ asarUnpack:
12
+ - resources/**
13
+ win:
14
+ executableName: HeyGem
15
+ nsis:
16
+ oneClick: false
17
+ allowElevation: true
18
+ perMachine: true
19
+ allowToChangeInstallationDirectory: true
20
+ artifactName: ${productName}-${version}-setup.${ext}
21
+ shortcutName: ${productName}
22
+ uninstallDisplayName: ${productName}
23
+ createDesktopShortcut: always
24
+ installerIcon: build/icon.ico
25
+ uninstallerIcon: build/icon.ico
26
+ mac:
27
+ entitlementsInherit: build/entitlements.mac.plist
28
+ extendInfo:
29
+ - NSCameraUsageDescription: Application requests access to the device's camera.
30
+ - NSMicrophoneUsageDescription: Application requests access to the device's microphone.
31
+ - NSDocumentsFolderUsageDescription: Application requests access to the user's Documents folder.
32
+ - NSDownloadsFolderUsageDescription: Application requests access to the user's Downloads folder.
33
+ notarize: false
34
+ dmg:
35
+ artifactName: ${productName}-${version}.${ext}
36
+ linux:
37
+ target:
38
+ - AppImage
39
+ # - snap
40
+ # - deb
41
+ maintainer: electronjs.org
42
+ category: Utility
43
+ icon: build/icon.png
44
+ desktop:
45
+ Name: ${productName}
46
+ Comment: ${productName}
47
+ GenericName: ${productName}
48
+ executableName: ${productName}
49
+ Icon: build/icon.png
50
+ Terminal: false
51
+ Type: Application
52
+ Categories: Utility
53
+ Keywords:
54
+ - ${productName}
55
+ - 数字人
56
+ - 视频生成
57
+ appImage:
58
+ artifactName: ${productName}-${version}.${ext}
59
+ npmRebuild: false
60
+ publish:
61
+ provider: generic
62
+ url: https://example.com/auto-updates
63
+ electronDownload:
64
+ mirror: https://npmmirror.com/mirrors/electron/
electron.vite.config.mjs ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { resolve } from 'path'
2
+ import { defineConfig, externalizeDepsPlugin } from 'electron-vite'
3
+ import vue from '@vitejs/plugin-vue'
4
+ export default defineConfig({
5
+ main: {
6
+ plugins: [externalizeDepsPlugin()]
7
+ },
8
+ preload: {
9
+ plugins: [externalizeDepsPlugin()]
10
+ },
11
+ renderer: {
12
+ resolve: {
13
+ alias: {
14
+ '@renderer': resolve('src/renderer/src')
15
+ }
16
+ },
17
+ plugins: [vue()]
18
+ }
19
+ })
jsconfig.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "compilerOptions": {
3
+ "baseUrl": ".",
4
+ "paths": {
5
+ "@renderer/*": ["src/renderer/src/*"]
6
+ }
7
+ }
8
+ }
package-lock.json ADDED
The diff for this file is too large to render. See raw diff