Spaces:
Sleeping
Sleeping
File size: 7,630 Bytes
b94a974 ba4903d b94a974 ba4903d b94a974 ba4903d b94a974 ba4903d b94a974 ba4903d b94a974 ba4903d b94a974 ba4903d b94a974 ba4903d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 | import gradio as gr
from huggingface_hub import InferenceClient
from transformers import pipeline
import numpy as np
"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
# Initialize Bark TTS model
try:
synthesizer = pipeline("text-to-speech", "suno/bark")
tts_available = True
except Exception as e:
print(f"TTS model failed to load: {e}")
tts_available = False
synthesizer = None
def generate_speech(text):
"""Generate speech from text using Bark TTS"""
if not tts_available or not synthesizer:
return None, "TTS not available"
try:
speech = synthesizer(text, forward_params={"do_sample": True})
# Convert to format Gradio expects
audio_data = speech["audio"].flatten()
sample_rate = speech["sampling_rate"]
return sample_rate, audio_data
except Exception as e:
return None, f"TTS Error: {str(e)}"
def respond(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p,
):
"""Generate chat response"""
messages = [{"role": "system", "content": system_message}]
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
response = ""
for message in client.chat_completion(
messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
token = message.choices[0].delta.content
if token:
response += token
yield response
def respond_with_audio(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p,
enable_tts
):
"""Generate chat response and optionally convert to speech"""
# Get text response
final_response = ""
for response in respond(message, history, system_message, max_tokens, temperature, top_p):
final_response = response
yield response, None # Yield text first, audio comes later
# Generate audio if TTS is enabled
if enable_tts and tts_available and final_response.strip():
try:
# Clean response for TTS (remove markdown, keep essential punctuation)
clean_text = final_response.replace("*", "").replace("#", "").replace("`", "")
# Limit length for TTS (Bark works best with shorter texts)
if len(clean_text) > 500:
clean_text = clean_text[:500] + "..."
sample_rate, audio_data = generate_speech(clean_text)
if sample_rate:
yield final_response, (sample_rate, audio_data)
else:
yield final_response, None
except Exception as e:
print(f"TTS generation failed: {e}")
yield final_response, None
else:
yield final_response, None
# Create the main chat interface with TTS option
with gr.Blocks(title="Chat + TTS Bot") as demo:
gr.Markdown("# 🤖 Chat Bot with Text-to-Speech")
gr.Markdown("Chat with Zephyr-7B and optionally hear responses with Bark TTS")
with gr.Row():
with gr.Column(scale=2):
chatbot = gr.Chatbot(height=400)
msg = gr.Textbox(
placeholder="Type your message here...",
label="Message",
lines=2
)
with gr.Row():
submit = gr.Button("💬 Send", variant="primary")
clear = gr.Button("🗑️ Clear")
with gr.Column(scale=1):
# TTS Controls
gr.Markdown("### 🔊 Text-to-Speech")
enable_tts = gr.Checkbox(
label="Enable TTS for responses",
value=False,
info="Generate audio for bot responses"
)
audio_output = gr.Audio(
label="Response Audio",
autoplay=False,
visible=True
)
# Manual TTS
gr.Markdown("### 🎤 Manual TTS")
tts_input = gr.Textbox(
placeholder="Enter text to convert to speech...",
label="Text for TTS",
lines=2
)
tts_button = gr.Button("🗣️ Generate Speech")
# Chat Settings (Collapsible)
with gr.Accordion("⚙️ Chat Settings", open=False):
system_message = gr.Textbox(
value="You are a friendly and helpful AI assistant.",
label="System Message",
lines=2
)
with gr.Row():
max_tokens = gr.Slider(
minimum=1,
maximum=2048,
value=512,
step=1,
label="Max tokens"
)
temperature = gr.Slider(
minimum=0.1,
maximum=4.0,
value=0.7,
step=0.1,
label="Temperature"
)
top_p = gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p"
)
# State for chat history
chat_history = gr.State([])
def user_message(message, history):
"""Add user message to chat"""
return "", history + [[message, None]]
def bot_response(history, system_msg, max_tok, temp, top_p, tts_enabled):
"""Generate bot response with optional TTS"""
if not history or not history[-1][0]:
return history, None
user_msg = history[-1][0]
# Generate response
for response, audio in respond_with_audio(
user_msg,
history[:-1],
system_msg,
max_tok,
temp,
top_p,
tts_enabled
):
history[-1][1] = response
yield history, audio
def manual_tts(text):
"""Generate TTS for manual input"""
if not text.strip():
return None
return generate_speech(text)
# Event handlers
msg.submit(
user_message,
[msg, chatbot],
[msg, chatbot],
queue=False
).then(
bot_response,
[chatbot, system_message, max_tokens, temperature, top_p, enable_tts],
[chatbot, audio_output]
)
submit.click(
user_message,
[msg, chatbot],
[msg, chatbot],
queue=False
).then(
bot_response,
[chatbot, system_message, max_tokens, temperature, top_p, enable_tts],
[chatbot, audio_output]
)
clear.click(lambda: ([], None), outputs=[chatbot, audio_output])
tts_button.click(
manual_tts,
inputs=[tts_input],
outputs=[audio_output]
)
# Add examples
gr.Examples(
examples=[
["Hello! How are you today?"],
["Tell me a short joke [laughs]"],
["Explain quantum physics in simple terms"],
["What's the weather like? [sighs]"]
],
inputs=[msg],
label="Example messages (try the ones with [laughs] or [sighs] for TTS effects!)"
)
if __name__ == "__main__":
demo.launch() |