Spaces:
Sleeping
Sleeping
File size: 3,853 Bytes
1acda03 ed85784 db61141 1acda03 656e44c 1acda03 f7ee472 1acda03 db61141 606324d db61141 656e44c 20c1db9 606324d 1acda03 20c1db9 1acda03 606324d 1acda03 606324d 1acda03 db61141 fca14be 1acda03 20c1db9 606324d db61141 fca14be 1acda03 20c1db9 1acda03 20c1db9 1acda03 fca14be db61141 20c1db9 db61141 20c1db9 fca14be 20c1db9 db61141 1acda03 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 | import os
import asyncio
import wave
import gradio as gr
import tempfile
from groq import Groq
from google import genai
from google.genai import types
BOT_NAME = "Nilla"
MOTOR_NAME = "Nilla-2026 GPT motor"
PROVIDER = "HumanV lab"
POR = os.environ.get("POR")
MODEL_ID = os.environ.get("MODEL_VERSION")
UK_SERVER_API = os.environ.get("UK_SERVER_API")
GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
client_gemini = genai.Client(
api_key=UK_SERVER_API,
http_options={"api_version": "v1alpha"}
)
client_groq = Groq(api_key=GROQ_API_KEY)
async def nilla_engine(audio_path, text_input, chat_history):
if chat_history is None:
chat_history = []
user_text = ""
is_voice = False
if text_input and text_input.strip():
user_text = text_input
is_voice = False
elif audio_path:
with open(audio_path, "rb") as file:
transcription = client_groq.audio.transcriptions.create(
file=(audio_path, file.read()),
model="whisper-large-v3",
temperature=0,
response_format="verbose_json",
)
user_text = transcription.text
is_voice = True
else:
return None, "", "", chat_history
output_path = None
model_response_text = ""
current_turns = chat_history + [{"role": "user", "parts": [{"text": user_text}]}]
config = {
"response_modalities": ["AUDIO"],
"system_instruction": POR,
"enable_affective_dialog": True,
"output_audio_transcription": {}
}
try:
async with client_gemini.aio.live.connect(model=MODEL_ID, config=config) as session:
await session.send_client_content(turns=current_turns, turn_complete=True)
if is_voice:
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
output_path = temp_file.name
with wave.open(output_path, "wb") as wav:
wav.setnchannels(1)
wav.setsampwidth(2)
wav.setframerate(24000)
async for response in session.receive():
if response.data:
wav.writeframes(response.data)
if response.server_content and response.server_content.output_transcription:
model_response_text += response.server_content.output_transcription.text
else:
async for response in session.receive():
if response.server_content and response.server_content.output_transcription:
model_response_text += response.server_content.output_transcription.text
new_history = current_turns + [{"role": "model", "parts": [{"text": model_response_text}]}]
return output_path, user_text, model_response_text, new_history
except Exception:
return None, user_text, "Error", chat_history
def run_interface(audio_file, text_input, chat_history):
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
return loop.run_until_complete(nilla_engine(audio_file, text_input, chat_history))
with gr.Blocks(title=BOT_NAME) as demo:
history_component = gr.JSON(value=[], visible=False)
with gr.Row():
in_audio = gr.Audio(label="Audio", type="filepath")
in_text = gr.Textbox(label="Text")
with gr.Row():
out_audio = gr.Audio(label="Voice Resp")
out_user = gr.Textbox(label="User Text")
out_nilla = gr.Textbox(label="Nilla Text")
btn = gr.Button("Process")
btn.click(
fn=run_interface,
inputs=[in_audio, in_text, history_component],
outputs=[out_audio, out_user, out_nilla, history_component],
api_name="run_interface"
)
if __name__ == "__main__":
demo.launch() |