humanvprojectceo commited on
Commit
db61141
·
verified ·
1 Parent(s): 334d7fc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -20
app.py CHANGED
@@ -2,22 +2,38 @@ import os
2
  import asyncio
3
  import wave
4
  import gradio as gr
 
 
5
  from google import genai
6
  from google.genai import types
7
- import tempfile
8
 
9
  BOT_NAME = "Nilla"
10
  MOTOR_NAME = "Nilla-2026 GPT motor"
11
  PROVIDER = "HumanV lab"
12
- SYSTEM_INSTRUCTION = f"Your name is {BOT_NAME} with {MOTOR_NAME} and providing by {PROVIDER}."
13
  MODEL_ID = os.environ.get("MODEL_VERSION")
14
  UK_SERVER_API = os.environ.get("UK_SERVER_API")
 
 
 
 
15
 
16
- client = genai.Client(api_key=UK_SERVER_API)
 
 
17
 
18
- async def nilla_voice_engine(text):
19
- temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
20
- file_path = temp_file.name
 
 
 
 
 
 
 
 
 
21
 
22
  config = {
23
  "response_modalities": ["AUDIO"],
@@ -25,13 +41,13 @@ async def nilla_voice_engine(text):
25
  }
26
 
27
  try:
28
- async with client.aio.live.connect(model=MODEL_ID, config=config) as session:
29
  await session.send_client_content(
30
- turns={"role": "user", "parts": [{"text": text}]},
31
  turn_complete=True
32
  )
33
 
34
- with wave.open(file_path, "wb") as wav:
35
  wav.setnchannels(1)
36
  wav.setsampwidth(2)
37
  wav.setframerate(24000)
@@ -41,26 +57,32 @@ async def nilla_voice_engine(text):
41
  if response.data is not None:
42
  wav.writeframes(response.data)
43
 
44
- return file_path
45
- except Exception as e:
46
- return None
47
 
48
- def run_api(text):
49
- if not text:
50
- return None
51
  loop = asyncio.new_event_loop()
52
  asyncio.set_event_loop(loop)
53
- return loop.run_until_complete(nilla_voice_engine(text))
54
 
55
  with gr.Blocks(title=BOT_NAME) as demo:
56
  gr.Markdown(f"# {BOT_NAME}")
57
  gr.Markdown(f"{MOTOR_NAME} | {PROVIDER}")
58
 
59
- input_text = gr.Textbox(label="Input", placeholder="...")
60
- output_audio = gr.Audio(label="Response", type="filepath")
61
- submit_btn = gr.Button("Generate")
 
 
 
62
 
63
- submit_btn.click(fn=run_api, inputs=input_text, outputs=output_audio)
 
 
 
 
 
64
 
65
  if __name__ == "__main__":
66
  demo.launch()
 
2
  import asyncio
3
  import wave
4
  import gradio as gr
5
+ import tempfile
6
+ from groq import Groq
7
  from google import genai
8
  from google.genai import types
 
9
 
10
  BOT_NAME = "Nilla"
11
  MOTOR_NAME = "Nilla-2026 GPT motor"
12
  PROVIDER = "HumanV lab"
13
+ PATH = os.environ.get("PATH")
14
  MODEL_ID = os.environ.get("MODEL_VERSION")
15
  UK_SERVER_API = os.environ.get("UK_SERVER_API")
16
+ GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
17
+
18
+ client_gemini = genai.Client(api_key=UK_SERVER_API)
19
+ client_groq = Groq(api_key=GROQ_API_KEY)
20
 
21
+ async def nilla_engine(audio_path):
22
+ if audio_path is None:
23
+ return None, "No audio provided"
24
 
25
+ with open(audio_path, "rb") as file:
26
+ transcription = client_groq.audio.transcriptions.create(
27
+ file=(audio_path, file.read()),
28
+ model="whisper-large-v3",
29
+ temperature=0,
30
+ response_format="verbose_json",
31
+ )
32
+
33
+ user_text = transcription.text
34
+
35
+ output_temp = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
36
+ output_path = output_temp.name
37
 
38
  config = {
39
  "response_modalities": ["AUDIO"],
 
41
  }
42
 
43
  try:
44
+ async with client_gemini.aio.live.connect(model=MODEL_ID, config=config) as session:
45
  await session.send_client_content(
46
+ turns={"role": "user", "parts": [{"text": user_text}]},
47
  turn_complete=True
48
  )
49
 
50
+ with wave.open(output_path, "wb") as wav:
51
  wav.setnchannels(1)
52
  wav.setsampwidth(2)
53
  wav.setframerate(24000)
 
57
  if response.data is not None:
58
  wav.writeframes(response.data)
59
 
60
+ return output_path, user_text
61
+ except Exception:
62
+ return None, user_text
63
 
64
+ def run_interface(audio_file):
 
 
65
  loop = asyncio.new_event_loop()
66
  asyncio.set_event_loop(loop)
67
+ return loop.run_until_complete(nilla_engine(audio_file))
68
 
69
  with gr.Blocks(title=BOT_NAME) as demo:
70
  gr.Markdown(f"# {BOT_NAME}")
71
  gr.Markdown(f"{MOTOR_NAME} | {PROVIDER}")
72
 
73
+ with gr.Row():
74
+ input_audio = gr.Audio(label="Speak to Nilla", type="filepath")
75
+
76
+ with gr.Row():
77
+ output_text = gr.Textbox(label="Transcribed Text (Groq)")
78
+ output_audio = gr.Audio(label="Nilla's Response")
79
 
80
+ submit_btn = gr.Button("Process")
81
+ submit_btn.click(
82
+ fn=run_interface,
83
+ inputs=input_audio,
84
+ outputs=[output_audio, output_text]
85
+ )
86
 
87
  if __name__ == "__main__":
88
  demo.launch()