Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import requests | |
| import os | |
| import tempfile | |
| from groq import Groq | |
| import torch | |
| import soundfile as sf | |
| from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan | |
| from datasets import load_dataset | |
| # ============================== | |
| # API KEYS | |
| # ============================== | |
| HF_TOKEN = os.getenv("HF_TOKEN") | |
| GROQ_API_KEY = os.getenv("GROQ_API_KEY") | |
| groq_client = Groq(api_key=GROQ_API_KEY) | |
| # ============================== | |
| # LOAD TTS MODELS | |
| # ============================== | |
| processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts") | |
| tts_model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts") | |
| vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan") | |
| embeddings_dataset = load_dataset( | |
| "Matthijs/cmu-arctic-xvectors", | |
| split="validation" | |
| ) | |
| speaker_embeddings = torch.tensor( | |
| embeddings_dataset[7306]["xvector"] | |
| ).unsqueeze(0) | |
| # ============================== | |
| # TEXT β SPEECH | |
| # ============================== | |
| def text_to_speech(text): | |
| inputs = processor( | |
| text=text, | |
| return_tensors="pt", | |
| truncation=True, | |
| max_length=500 | |
| ) | |
| speech = tts_model.generate_speech( | |
| inputs["input_ids"], | |
| speaker_embeddings, | |
| vocoder=vocoder | |
| ) | |
| temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".wav") | |
| sf.write(temp_audio.name, speech.numpy(), samplerate=16000) | |
| return temp_audio.name | |
| # ============================== | |
| # IMAGE GENERATION | |
| # ============================== | |
| def generate_image(prompt): | |
| API_URL = "https://router.huggingface.co/hf-inference/models/stabilityai/stable-diffusion-xl-base-1.0" | |
| headers = { | |
| "Authorization": f"Bearer {HF_TOKEN}" | |
| } | |
| payload = { | |
| "inputs": prompt | |
| } | |
| response = requests.post(API_URL, headers=headers, json=payload) | |
| print("HF STATUS:", response.status_code) | |
| if response.status_code != 200: | |
| print(response.text) | |
| return None | |
| image_bytes = response.content | |
| temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".png") | |
| temp_file.write(image_bytes) | |
| temp_file.close() | |
| return temp_file.name | |
| # ============================== | |
| # GROQ CHATBOT | |
| # ============================== | |
| def ask_llm(question): | |
| response = groq_client.chat.completions.create( | |
| model="llama-3.1-8b-instant", | |
| max_tokens=200, | |
| messages=[ | |
| {"role": "user", "content": question} | |
| ] | |
| ) | |
| return response.choices[0].message.content | |
| # ============================== | |
| # MAIN ASSISTANT FUNCTION | |
| # ============================== | |
| def ai_assistant(user_input): | |
| reply = ask_llm(user_input) | |
| image = None | |
| if "image" in user_input.lower() or "generate" in user_input.lower(): | |
| image = generate_image(user_input) | |
| audio = text_to_speech(reply) | |
| return reply, audio, image | |
| # ============================== | |
| # GRADIO UI | |
| # ============================== | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# π€ AI Assistant (Chat + Voice + Image)") | |
| user_input = gr.Textbox( | |
| label="Ask something or request an image" | |
| ) | |
| text_output = gr.Textbox( | |
| label="Assistant Response" | |
| ) | |
| audio_output = gr.Audio( | |
| label="Voice Response" | |
| ) | |
| image_output = gr.Image( | |
| label="Generated Image" | |
| ) | |
| submit_btn = gr.Button("Submit") | |
| submit_btn.click( | |
| fn=ai_assistant, | |
| inputs=user_input, | |
| outputs=[text_output, audio_output, image_output] | |
| ) | |
| demo.launch() |