Spaces:

DKethan
/

Sage

Build error

App Files Files Community

Sage / app.py

DKethan

Update app.py

11866a6 verified about 1 year ago

raw

history blame contribute delete

4.22 kB

	import gradio as gr
	import edge_tts
	import asyncio
	import tempfile
	import os
	from huggingface_hub import InferenceClient
	import torch
	import random
	from streaming_stt_nemo import Model

	# Default language and STT engine
	default_lang = "en"
	engines = {default_lang: Model(default_lang)}

	# Function to transcribe audio to text
	def transcribe(audio):
	if not audio or not os.path.exists(audio):
	raise ValueError("Invalid audio input: file does not exist or is None.")

	lang = default_lang
	model = engines[lang]

	try:
	text = model.stt_file(audio)[0]
	except Exception as e:
	raise RuntimeError(f"Error during speech-to-text conversion: {e}")

	return text

	# Hugging Face Inference client function
	def client_fn(model):
	if "Llama" in model:
	return InferenceClient("meta-llama/Meta-Llama-3-8B-Instruct")
	elif "Mistral" in model:
	return InferenceClient("mistralai/Mistral-7B-Instruct-v0.2")
	elif "Phi" in model:
	return InferenceClient("microsoft/Phi-3-mini-4k-instruct")
	else:
	return InferenceClient("microsoft/Phi-3-mini-4k-instruct")

	# Random seed generator
	def randomize_seed_fn(seed: int) -> int:
	seed = random.randint(0, 999999)
	return seed

	# Function to generate AI response using the selected model
	def models(text, model, seed=42):
	seed = int(randomize_seed_fn(seed))
	generator = torch.Generator().manual_seed(seed)

	client = client_fn(model)

	prompt = [
	{
	"role": "system",
	"content": (
	"You are a personal assistant named 'Sage'. "
	"You are asked the following question by the user. "
	"Rules for the answer:\n"
	"1. Respond in a normal conversational manner while being friendly and helpful.\n"
	"2. Keep your response concise, ideally under 50 words.\n"
	"3. Provide clear and direct answers to the user's question."
	)
	},
	{"role": "user", "content": f"{text}"}
	]

	output = ""
	try:
	for token in client.chat_completion(prompt, max_tokens=200, stream=True):
	if token.choices and len(token.choices) > 0:
	delta_content = token.choices[0].delta.content
	if delta_content:
	output += delta_content
	except Exception as e:
	raise RuntimeError(f"Error during text generation: {e}")

	return output

	# Async function to handle the response generation and audio output
	async def respond(audio, model, seed):
	try:
	user = transcribe(audio)
	reply = models(user, model, seed)
	communicate = edge_tts.Communicate(reply)

	with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
	tmp_path = tmp_file.name
	await communicate.save(tmp_path)

	yield tmp_path
	except Exception as e:
	print(f"Error in respond function: {e}")
	yield None

	# Gradio UI description
	DESCRIPTION = """ # <center><b>SAGE ⚡</b></center>
	### <center>Your personal assistant at your service!</center>
	"""

	# Gradio interface
	with gr.Blocks(css="style.css") as demo:
	gr.Markdown(DESCRIPTION)
	with gr.Row():
	select = gr.Dropdown(
	['Llama 3 8B ', 'Mistral 7B', 'Phi 3'],
	value="Phi 3",
	label="Model"
	)
	seed = gr.Slider(
	label="Seed",
	minimum=0,
	maximum=999999,
	step=1,
	value=0,
	visible=False
	)
	input_audio = gr.Audio(
	label="User",
	sources="microphone",
	type="filepath",
	waveform_options=False
	)
	output_audio = gr.Audio(
	label="AI",
	type="filepath",
	interactive=False,
	autoplay=True,
	elem_classes="audio"
	)
	gr.Interface(
	batch=True,
	max_batch_size=10,
	fn=respond,
	inputs=[input_audio, select, seed],
	outputs=[output_audio],
	live=True
	)

	# Start the app
	if __name__ == "__main__":
	demo.queue(max_size=200).launch()