| import gradio as gr |
| import edge_tts |
| import asyncio |
| import tempfile |
| import os |
| from huggingface_hub import InferenceClient |
| import random |
| import torch |
|
|
| default_lang = "en" |
|
|
| |
| def transcribe(audio): |
| |
| return "Transcribed text from audio." |
|
|
| |
| def client_fn(model): |
| |
| return InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1") |
|
|
| def randomize_seed_fn(seed: int) -> int: |
| return random.randint(0, 999999) |
|
|
| system_instructions1 = """ |
| [SYSTEM] Answer as a Swahili AI, made by 'Laocta Tech labs.' |
| Keep conversation friendly, short, clear, and concise. |
| [USER] |
| """ |
|
|
| def models(text, model="Mixtral 8x7B", seed=42): |
| seed = int(randomize_seed_fn(seed)) |
| generator = torch.Generator().manual_seed(seed) |
| client = client_fn(model) |
| generate_kwargs = dict( |
| max_new_tokens=300, |
| seed=seed |
| ) |
| formatted_prompt = system_instructions1 + text + "[Sema]" |
| stream = client.text_generation( |
| formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False) |
| output = "" |
| for response in stream: |
| if not response.token.text == "</s>": |
| output += response.token.text |
| return output |
|
|
| async def respond(audio, model, seed): |
| user = transcribe(audio) |
| reply = models(user, model, seed) |
| communicate = edge_tts.Communicate(reply) |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file: |
| tmp_path = tmp_file.name |
| await communicate.save(tmp_path) |
| yield tmp_path |
|
|
| |
| custom_css = """ |
| body { |
| background-color: #1a2b40; |
| color: white; |
| text-align: center; |
| } |
| .container { |
| display: flex; |
| justify-content: center; |
| align-items: center; |
| height: 90vh; |
| flex-direction: column; |
| } |
| .microphone-button { |
| width: 120px; |
| height: 120px; |
| background-color: #00d084; |
| border-radius: 50%; |
| display: flex; |
| justify-content: center; |
| align-items: center; |
| margin-bottom: 20px; |
| cursor: pointer; |
| transition: background-color 0.3s ease; |
| } |
| .microphone-button:hover { |
| background-color: #009f65; |
| } |
| .microphone-icon { |
| font-size: 48px; |
| color: white; |
| } |
| .try-button { |
| background-color: #00d084; |
| color: white; |
| padding: 10px 20px; |
| border-radius: 20px; |
| font-size: 18px; |
| cursor: pointer; |
| border: none; |
| transition: background-color 0.3s ease; |
| } |
| .try-button:hover { |
| background-color: #009f65; |
| } |
| """ |
|
|
| DESCRIPTION = """# <center><b>Sema-Ai⚡</b></center> |
| ### <center>Build, test, and deploy voice agents quickly and easily</center> |
| """ |
|
|
| with gr.Blocks(css=custom_css) as demo: |
| gr.Markdown(DESCRIPTION) |
| with gr.Row(elem_id="container"): |
| with gr.Column(): |
| |
| mic_btn = gr.Audio(label="Click the microphone", |
| sources="microphone", |
| type="filepath", |
| elem_classes="microphone-button", |
| show_label=False) |
|
|
| mic_icon = gr.Markdown("🎤", elem_classes="microphone-icon") |
| |
| |
| output = gr.Audio(label="AI", type="filepath", interactive=False, autoplay=True) |
|
|
| |
| select = gr.Dropdown( |
| ['Mixtral 8x7B', 'Llama 3 8B', 'Mistral 7B v0.3', 'Phi 3 mini'], |
| value="Mistral 7B v0.3", |
| label="Model" |
| ) |
|
|
| seed = gr.Slider( |
| label="Seed", |
| minimum=0, |
| maximum=999999, |
| step=1, |
| value=0, |
| visible=False |
| ) |
|
|
| |
| try_btn = gr.Button("Give it a try!", elem_classes="try-button") |
|
|
| try_btn.click(fn=respond, inputs=[mic_btn, select, seed], outputs=[output], live=True) |
|
|
| if __name__ == "__main__": |
| demo.queue(max_size=200).launch() |
|
|