almsot 02 version
Browse files
app.py
CHANGED
|
@@ -3,7 +3,6 @@ from huggingface_hub import InferenceClient
|
|
| 3 |
|
| 4 |
|
| 5 |
client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.3")
|
| 6 |
-
#client = InferenceClient("Qwen/Qwen2.5-7B-Instruct")
|
| 7 |
|
| 8 |
|
| 9 |
def generate_text(messages):
|
|
@@ -13,12 +12,11 @@ def generate_text(messages):
|
|
| 13 |
for token in client.chat_completion(messages, max_tokens=100,stream=True):
|
| 14 |
content = (token.choices[0].delta.content)
|
| 15 |
generated += content
|
| 16 |
-
#print(content)
|
| 17 |
-
#print(''.join(list(content)))
|
| 18 |
yield generated
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
|
|
|
| 22 |
|
| 23 |
def call_generate_text(message, history):
|
| 24 |
#if len(message) == 0:
|
|
@@ -35,7 +33,7 @@ def call_generate_text(message, history):
|
|
| 35 |
text_generator = generate_text(messages)
|
| 36 |
|
| 37 |
for text_chunk in text_generator:
|
| 38 |
-
print(f"chunk={text_chunk}")
|
| 39 |
assistant_message["content"] = text_chunk
|
| 40 |
updated_history = messages + [assistant_message]
|
| 41 |
yield "", updated_history
|
|
@@ -47,19 +45,28 @@ def call_generate_text(message, history):
|
|
| 47 |
head = '''
|
| 48 |
<script src="https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.webgpu.min.js" ></script>
|
| 49 |
<script type="module">
|
| 50 |
-
import {
|
| 51 |
-
|
|
|
|
| 52 |
</script>
|
| 53 |
'''
|
| 54 |
|
| 55 |
with gr.Blocks(title="LLM with TTS",head=head) as demo:
|
| 56 |
-
gr.Markdown("
|
| 57 |
-
|
| 58 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
|
| 60 |
js = """
|
| 61 |
-
function(chatbot){
|
| 62 |
-
window.
|
| 63 |
//auto scroll
|
| 64 |
var chatElement = document.getElementById('gr-chatbot');
|
| 65 |
chatElement.scrollTop = chatElement.scrollHeight;
|
|
@@ -100,7 +107,7 @@ with gr.Blocks(title="LLM with TTS",head=head) as demo:
|
|
| 100 |
import os
|
| 101 |
remote_dir ="/home/user/app/"
|
| 102 |
local_dir = "C:\\Users\\owner\\Documents\\pythons\\huggingface\\mistral-7b-v0.3-matcha-tts-en" #sorry this is my develop env
|
| 103 |
-
|
| 104 |
# set not dir but file
|
| 105 |
#demo.launch(allowed_paths=[os.path.join(remote_dir,"models","ljspeech_sim.onnx")])
|
| 106 |
-
demo.launch(allowed_paths=[os.path.join(local_dir,"models","ljspeech_sim.onnx")])
|
|
|
|
| 3 |
|
| 4 |
|
| 5 |
client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.3")
|
|
|
|
| 6 |
|
| 7 |
|
| 8 |
def generate_text(messages):
|
|
|
|
| 12 |
for token in client.chat_completion(messages, max_tokens=100,stream=True):
|
| 13 |
content = (token.choices[0].delta.content)
|
| 14 |
generated += content
|
|
|
|
|
|
|
| 15 |
yield generated
|
| 16 |
+
|
| 17 |
+
last = generated[-1]
|
| 18 |
+
if last not in [",",".","!","?"]:
|
| 19 |
+
yield generated+"," #no stram version
|
| 20 |
|
| 21 |
def call_generate_text(message, history):
|
| 22 |
#if len(message) == 0:
|
|
|
|
| 33 |
text_generator = generate_text(messages)
|
| 34 |
|
| 35 |
for text_chunk in text_generator:
|
| 36 |
+
#print(f"chunk={text_chunk}")
|
| 37 |
assistant_message["content"] = text_chunk
|
| 38 |
updated_history = messages + [assistant_message]
|
| 39 |
yield "", updated_history
|
|
|
|
| 45 |
head = '''
|
| 46 |
<script src="https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.webgpu.min.js" ></script>
|
| 47 |
<script type="module">
|
| 48 |
+
import { matccha_tts_onnx_env ,matcha_tts_raw_env} from "http://localhost:8000/js-esm/matcha_tts_onnx_en.js";
|
| 49 |
+
matccha_tts_onnx_env.matcha_tts_model_path = "/file=models/ljspeech_sim.onnx"
|
| 50 |
+
matcha_tts_raw_env.maxInputLength = 140 //if Device removed reason: DXGI_ERROR_DEVICE_HUNG happend reduce to HALF
|
| 51 |
</script>
|
| 52 |
'''
|
| 53 |
|
| 54 |
with gr.Blocks(title="LLM with TTS",head=head) as demo:
|
| 55 |
+
gr.Markdown("""
|
| 56 |
+
## Warnings
|
| 57 |
+
- Don't listen large volume or with headone until confirm your machine can play aduio
|
| 58 |
+
- some time gpu crash because of maxInputLength if you crash let me know with your gpu-info
|
| 59 |
+
## Notice
|
| 60 |
+
- LLM is unstable:The inference client used in this demo exhibits inconsistent performance. While it can provide responses in milliseconds, it sometimes becomes unresponsive and times out.
|
| 61 |
+
- TTS talke a long loading time:Please be patient, the first response may have a delay of up to over 40 seconds while loading.
|
| 62 |
+
|
| 63 |
+
""")
|
| 64 |
+
|
| 65 |
+
gr.Markdown("**Mistral-7B-Instruct-v0.3/LJSpeech** - LLM and TTS models will change without notice.")
|
| 66 |
|
| 67 |
js = """
|
| 68 |
+
async function(chatbot){
|
| 69 |
+
await window.matcha_tts_update_chatbot(chatbot)
|
| 70 |
//auto scroll
|
| 71 |
var chatElement = document.getElementById('gr-chatbot');
|
| 72 |
chatElement.scrollTop = chatElement.scrollHeight;
|
|
|
|
| 107 |
import os
|
| 108 |
remote_dir ="/home/user/app/"
|
| 109 |
local_dir = "C:\\Users\\owner\\Documents\\pythons\\huggingface\\mistral-7b-v0.3-matcha-tts-en" #sorry this is my develop env
|
| 110 |
+
|
| 111 |
# set not dir but file
|
| 112 |
#demo.launch(allowed_paths=[os.path.join(remote_dir,"models","ljspeech_sim.onnx")])
|
| 113 |
+
demo.launch(allowed_paths=[os.path.join(remote_dir,"models","ljspeech_sim.onnx"),os.path.join(local_dir,"models","ljspeech_sim.onnx")])
|