Spaces:

humanvprojectceo
/

HumanV

Sleeping

App Files Files Community

HumanV / app.py

humanvprojectceo

Update app.py

20c1db9 verified about 2 months ago

raw

history blame contribute delete

3.85 kB

	import os
	import asyncio
	import wave
	import gradio as gr
	import tempfile
	from groq import Groq
	from google import genai
	from google.genai import types

	BOT_NAME = "Nilla"
	MOTOR_NAME = "Nilla-2026 GPT motor"
	PROVIDER = "HumanV lab"
	POR = os.environ.get("POR")
	MODEL_ID = os.environ.get("MODEL_VERSION")
	UK_SERVER_API = os.environ.get("UK_SERVER_API")
	GROQ_API_KEY = os.environ.get("GROQ_API_KEY")

	client_gemini = genai.Client(
	api_key=UK_SERVER_API,
	http_options={"api_version": "v1alpha"}
	)
	client_groq = Groq(api_key=GROQ_API_KEY)

	async def nilla_engine(audio_path, text_input, chat_history):
	if chat_history is None:
	chat_history = []

	user_text = ""
	is_voice = False

	if text_input and text_input.strip():
	user_text = text_input
	is_voice = False
	elif audio_path:
	with open(audio_path, "rb") as file:
	transcription = client_groq.audio.transcriptions.create(
	file=(audio_path, file.read()),
	model="whisper-large-v3",
	temperature=0,
	response_format="verbose_json",
	)
	user_text = transcription.text
	is_voice = True
	else:
	return None, "", "", chat_history

	output_path = None
	model_response_text = ""
	current_turns = chat_history + [{"role": "user", "parts": [{"text": user_text}]}]

	config = {
	"response_modalities": ["AUDIO"],
	"system_instruction": POR,
	"enable_affective_dialog": True,
	"output_audio_transcription": {}
	}

	try:
	async with client_gemini.aio.live.connect(model=MODEL_ID, config=config) as session:
	await session.send_client_content(turns=current_turns, turn_complete=True)

	if is_voice:
	temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
	output_path = temp_file.name
	with wave.open(output_path, "wb") as wav:
	wav.setnchannels(1)
	wav.setsampwidth(2)
	wav.setframerate(24000)
	async for response in session.receive():
	if response.data:
	wav.writeframes(response.data)
	if response.server_content and response.server_content.output_transcription:
	model_response_text += response.server_content.output_transcription.text
	else:
	async for response in session.receive():
	if response.server_content and response.server_content.output_transcription:
	model_response_text += response.server_content.output_transcription.text

	new_history = current_turns + [{"role": "model", "parts": [{"text": model_response_text}]}]
	return output_path, user_text, model_response_text, new_history
	except Exception:
	return None, user_text, "Error", chat_history

	def run_interface(audio_file, text_input, chat_history):
	loop = asyncio.new_event_loop()
	asyncio.set_event_loop(loop)
	return loop.run_until_complete(nilla_engine(audio_file, text_input, chat_history))

	with gr.Blocks(title=BOT_NAME) as demo:
	history_component = gr.JSON(value=[], visible=False)
	with gr.Row():
	in_audio = gr.Audio(label="Audio", type="filepath")
	in_text = gr.Textbox(label="Text")
	with gr.Row():
	out_audio = gr.Audio(label="Voice Resp")
	out_user = gr.Textbox(label="User Text")
	out_nilla = gr.Textbox(label="Nilla Text")

	btn = gr.Button("Process")
	btn.click(
	fn=run_interface,
	inputs=[in_audio, in_text, history_component],
	outputs=[out_audio, out_user, out_nilla, history_component],
	api_name="run_interface"
	)

	if __name__ == "__main__":
	demo.launch()