Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from huggingface_hub import InferenceClient | |
| from transformers import pipeline | |
| import numpy as np | |
| """ | |
| For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference | |
| """ | |
| client = InferenceClient("HuggingFaceH4/zephyr-7b-beta") | |
| # Initialize Bark TTS model | |
| try: | |
| synthesizer = pipeline("text-to-speech", "suno/bark") | |
| tts_available = True | |
| except Exception as e: | |
| print(f"TTS model failed to load: {e}") | |
| tts_available = False | |
| synthesizer = None | |
| def generate_speech(text): | |
| """Generate speech from text using Bark TTS""" | |
| if not tts_available or not synthesizer: | |
| return None, "TTS not available" | |
| try: | |
| speech = synthesizer(text, forward_params={"do_sample": True}) | |
| # Convert to format Gradio expects | |
| audio_data = speech["audio"].flatten() | |
| sample_rate = speech["sampling_rate"] | |
| return sample_rate, audio_data | |
| except Exception as e: | |
| return None, f"TTS Error: {str(e)}" | |
| def respond( | |
| message, | |
| history: list[tuple[str, str]], | |
| system_message, | |
| max_tokens, | |
| temperature, | |
| top_p, | |
| ): | |
| """Generate chat response""" | |
| messages = [{"role": "system", "content": system_message}] | |
| for val in history: | |
| if val[0]: | |
| messages.append({"role": "user", "content": val[0]}) | |
| if val[1]: | |
| messages.append({"role": "assistant", "content": val[1]}) | |
| messages.append({"role": "user", "content": message}) | |
| response = "" | |
| for message in client.chat_completion( | |
| messages, | |
| max_tokens=max_tokens, | |
| stream=True, | |
| temperature=temperature, | |
| top_p=top_p, | |
| ): | |
| token = message.choices[0].delta.content | |
| if token: | |
| response += token | |
| yield response | |
| def respond_with_audio( | |
| message, | |
| history: list[tuple[str, str]], | |
| system_message, | |
| max_tokens, | |
| temperature, | |
| top_p, | |
| enable_tts | |
| ): | |
| """Generate chat response and optionally convert to speech""" | |
| # Get text response | |
| final_response = "" | |
| for response in respond(message, history, system_message, max_tokens, temperature, top_p): | |
| final_response = response | |
| yield response, None # Yield text first, audio comes later | |
| # Generate audio if TTS is enabled | |
| if enable_tts and tts_available and final_response.strip(): | |
| try: | |
| # Clean response for TTS (remove markdown, keep essential punctuation) | |
| clean_text = final_response.replace("*", "").replace("#", "").replace("`", "") | |
| # Limit length for TTS (Bark works best with shorter texts) | |
| if len(clean_text) > 500: | |
| clean_text = clean_text[:500] + "..." | |
| sample_rate, audio_data = generate_speech(clean_text) | |
| if sample_rate: | |
| yield final_response, (sample_rate, audio_data) | |
| else: | |
| yield final_response, None | |
| except Exception as e: | |
| print(f"TTS generation failed: {e}") | |
| yield final_response, None | |
| else: | |
| yield final_response, None | |
| # Create the main chat interface with TTS option | |
| with gr.Blocks(title="Chat + TTS Bot") as demo: | |
| gr.Markdown("# 🤖 Chat Bot with Text-to-Speech") | |
| gr.Markdown("Chat with Zephyr-7B and optionally hear responses with Bark TTS") | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| chatbot = gr.Chatbot(height=400) | |
| msg = gr.Textbox( | |
| placeholder="Type your message here...", | |
| label="Message", | |
| lines=2 | |
| ) | |
| with gr.Row(): | |
| submit = gr.Button("💬 Send", variant="primary") | |
| clear = gr.Button("🗑️ Clear") | |
| with gr.Column(scale=1): | |
| # TTS Controls | |
| gr.Markdown("### 🔊 Text-to-Speech") | |
| enable_tts = gr.Checkbox( | |
| label="Enable TTS for responses", | |
| value=False, | |
| info="Generate audio for bot responses" | |
| ) | |
| audio_output = gr.Audio( | |
| label="Response Audio", | |
| autoplay=False, | |
| visible=True | |
| ) | |
| # Manual TTS | |
| gr.Markdown("### 🎤 Manual TTS") | |
| tts_input = gr.Textbox( | |
| placeholder="Enter text to convert to speech...", | |
| label="Text for TTS", | |
| lines=2 | |
| ) | |
| tts_button = gr.Button("🗣️ Generate Speech") | |
| # Chat Settings (Collapsible) | |
| with gr.Accordion("⚙️ Chat Settings", open=False): | |
| system_message = gr.Textbox( | |
| value="You are a friendly and helpful AI assistant.", | |
| label="System Message", | |
| lines=2 | |
| ) | |
| with gr.Row(): | |
| max_tokens = gr.Slider( | |
| minimum=1, | |
| maximum=2048, | |
| value=512, | |
| step=1, | |
| label="Max tokens" | |
| ) | |
| temperature = gr.Slider( | |
| minimum=0.1, | |
| maximum=4.0, | |
| value=0.7, | |
| step=0.1, | |
| label="Temperature" | |
| ) | |
| top_p = gr.Slider( | |
| minimum=0.1, | |
| maximum=1.0, | |
| value=0.95, | |
| step=0.05, | |
| label="Top-p" | |
| ) | |
| # State for chat history | |
| chat_history = gr.State([]) | |
| def user_message(message, history): | |
| """Add user message to chat""" | |
| return "", history + [[message, None]] | |
| def bot_response(history, system_msg, max_tok, temp, top_p, tts_enabled): | |
| """Generate bot response with optional TTS""" | |
| if not history or not history[-1][0]: | |
| return history, None | |
| user_msg = history[-1][0] | |
| # Generate response | |
| for response, audio in respond_with_audio( | |
| user_msg, | |
| history[:-1], | |
| system_msg, | |
| max_tok, | |
| temp, | |
| top_p, | |
| tts_enabled | |
| ): | |
| history[-1][1] = response | |
| yield history, audio | |
| def manual_tts(text): | |
| """Generate TTS for manual input""" | |
| if not text.strip(): | |
| return None | |
| return generate_speech(text) | |
| # Event handlers | |
| msg.submit( | |
| user_message, | |
| [msg, chatbot], | |
| [msg, chatbot], | |
| queue=False | |
| ).then( | |
| bot_response, | |
| [chatbot, system_message, max_tokens, temperature, top_p, enable_tts], | |
| [chatbot, audio_output] | |
| ) | |
| submit.click( | |
| user_message, | |
| [msg, chatbot], | |
| [msg, chatbot], | |
| queue=False | |
| ).then( | |
| bot_response, | |
| [chatbot, system_message, max_tokens, temperature, top_p, enable_tts], | |
| [chatbot, audio_output] | |
| ) | |
| clear.click(lambda: ([], None), outputs=[chatbot, audio_output]) | |
| tts_button.click( | |
| manual_tts, | |
| inputs=[tts_input], | |
| outputs=[audio_output] | |
| ) | |
| # Add examples | |
| gr.Examples( | |
| examples=[ | |
| ["Hello! How are you today?"], | |
| ["Tell me a short joke [laughs]"], | |
| ["Explain quantum physics in simple terms"], | |
| ["What's the weather like? [sighs]"] | |
| ], | |
| inputs=[msg], | |
| label="Example messages (try the ones with [laughs] or [sighs] for TTS effects!)" | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |