humanvprojectceo commited on
Commit
606324d
·
verified ·
1 Parent(s): 722902c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -15
app.py CHANGED
@@ -15,12 +15,18 @@ MODEL_ID = os.environ.get("MODEL_VERSION")
15
  UK_SERVER_API = os.environ.get("UK_SERVER_API")
16
  GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
17
 
18
- client_gemini = genai.Client(api_key=UK_SERVER_API)
 
 
 
19
  client_groq = Groq(api_key=GROQ_API_KEY)
20
 
21
- async def nilla_engine(audio_path):
22
  if audio_path is None:
23
- return None, "No audio provided"
 
 
 
24
 
25
  with open(audio_path, "rb") as file:
26
  transcription = client_groq.audio.transcriptions.create(
@@ -37,13 +43,18 @@ async def nilla_engine(audio_path):
37
 
38
  config = {
39
  "response_modalities": ["AUDIO"],
40
- "system_instruction": POR
 
 
41
  }
42
 
 
 
 
43
  try:
44
  async with client_gemini.aio.live.connect(model=MODEL_ID, config=config) as session:
45
  await session.send_client_content(
46
- turns={"role": "user", "parts": [{"text": user_text}]},
47
  turn_complete=True
48
  )
49
 
@@ -52,36 +63,45 @@ async def nilla_engine(audio_path):
52
  wav.setsampwidth(2)
53
  wav.setframerate(24000)
54
 
55
- turn = session.receive()
56
- async for response in turn:
57
  if response.data is not None:
58
  wav.writeframes(response.data)
 
 
 
 
 
59
 
60
- return output_path, user_text
61
  except Exception:
62
- return None, user_text
63
 
64
- def run_interface(audio_file):
65
  loop = asyncio.new_event_loop()
66
  asyncio.set_event_loop(loop)
67
- return loop.run_until_complete(nilla_engine(audio_file))
68
 
69
  with gr.Blocks(title=BOT_NAME) as demo:
70
  gr.Markdown(f"# {BOT_NAME}")
71
  gr.Markdown(f"{MOTOR_NAME} | {PROVIDER}")
72
 
 
 
73
  with gr.Row():
74
  input_audio = gr.Audio(label="Speak to Nilla", type="filepath")
75
 
76
  with gr.Row():
77
- output_text = gr.Textbox(label="Transcribed Text (Groq)")
78
- output_audio = gr.Audio(label="Nilla's Response")
 
 
 
79
 
80
  submit_btn = gr.Button("Process")
81
  submit_btn.click(
82
  fn=run_interface,
83
- inputs=input_audio,
84
- outputs=[output_audio, output_text]
85
  )
86
 
87
  if __name__ == "__main__":
 
15
  UK_SERVER_API = os.environ.get("UK_SERVER_API")
16
  GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
17
 
18
+ client_gemini = genai.Client(
19
+ api_key=UK_SERVER_API,
20
+ http_options={"api_version": "v1alpha"}
21
+ )
22
  client_groq = Groq(api_key=GROQ_API_KEY)
23
 
24
+ async def nilla_engine(audio_path, chat_history):
25
  if audio_path is None:
26
+ return None, "", "", chat_history
27
+
28
+ if chat_history is None:
29
+ chat_history = []
30
 
31
  with open(audio_path, "rb") as file:
32
  transcription = client_groq.audio.transcriptions.create(
 
43
 
44
  config = {
45
  "response_modalities": ["AUDIO"],
46
+ "system_instruction": POR,
47
+ "enable_affective_dialog": True,
48
+ "output_audio_transcription": {}
49
  }
50
 
51
+ current_turns = chat_history + [{"role": "user", "parts": [{"text": user_text}]}]
52
+ model_response_text = ""
53
+
54
  try:
55
  async with client_gemini.aio.live.connect(model=MODEL_ID, config=config) as session:
56
  await session.send_client_content(
57
+ turns=current_turns,
58
  turn_complete=True
59
  )
60
 
 
63
  wav.setsampwidth(2)
64
  wav.setframerate(24000)
65
 
66
+ async for response in session.receive():
 
67
  if response.data is not None:
68
  wav.writeframes(response.data)
69
+
70
+ if response.server_content and response.server_content.output_transcription:
71
+ model_response_text += response.server_content.output_transcription.text
72
+
73
+ new_history = current_turns + [{"role": "model", "parts": [{"text": model_response_text}]}]
74
 
75
+ return output_path, user_text, model_response_text, new_history
76
  except Exception:
77
+ return None, user_text, "Error processing", chat_history
78
 
79
+ def run_interface(audio_file, chat_history):
80
  loop = asyncio.new_event_loop()
81
  asyncio.set_event_loop(loop)
82
+ return loop.run_until_complete(nilla_engine(audio_file, chat_history))
83
 
84
  with gr.Blocks(title=BOT_NAME) as demo:
85
  gr.Markdown(f"# {BOT_NAME}")
86
  gr.Markdown(f"{MOTOR_NAME} | {PROVIDER}")
87
 
88
+ history_state = gr.State([])
89
+
90
  with gr.Row():
91
  input_audio = gr.Audio(label="Speak to Nilla", type="filepath")
92
 
93
  with gr.Row():
94
+ output_text_user = gr.Textbox(label="You said (Groq)")
95
+ output_text_nilla = gr.Textbox(label="Nilla said (Transcript)")
96
+
97
+ with gr.Row():
98
+ output_audio = gr.Audio(label="Nilla's Voice Response")
99
 
100
  submit_btn = gr.Button("Process")
101
  submit_btn.click(
102
  fn=run_interface,
103
+ inputs=[input_audio, history_state],
104
+ outputs=[output_audio, output_text_user, output_text_nilla, history_state]
105
  )
106
 
107
  if __name__ == "__main__":