Spaces:

humanvprojectceo
/

HumanV

Sleeping

App Files Files Community

humanvprojectceo commited on Feb 10

Commit

606324d

verified ·

1 Parent(s): 722902c

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -15

app.py CHANGED Viewed

@@ -15,12 +15,18 @@ MODEL_ID = os.environ.get("MODEL_VERSION")
 UK_SERVER_API = os.environ.get("UK_SERVER_API")
 GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
-client_gemini = genai.Client(api_key=UK_SERVER_API)
 client_groq = Groq(api_key=GROQ_API_KEY)
-async def nilla_engine(audio_path):
     if audio_path is None:
-        return None, "No audio provided"
     with open(audio_path, "rb") as file:
         transcription = client_groq.audio.transcriptions.create(
@@ -37,13 +43,18 @@ async def nilla_engine(audio_path):
     config = {
         "response_modalities": ["AUDIO"],
-        "system_instruction": POR
     }
     try:
         async with client_gemini.aio.live.connect(model=MODEL_ID, config=config) as session:
             await session.send_client_content(
-                turns={"role": "user", "parts": [{"text": user_text}]},
                 turn_complete=True
             )
@@ -52,36 +63,45 @@ async def nilla_engine(audio_path):
                 wav.setsampwidth(2)
                 wav.setframerate(24000)
-                turn = session.receive()
-                async for response in turn:
                     if response.data is not None:
                         wav.writeframes(response.data)
-        return output_path, user_text
     except Exception:
-        return None, user_text
-def run_interface(audio_file):
     loop = asyncio.new_event_loop()
     asyncio.set_event_loop(loop)
-    return loop.run_until_complete(nilla_engine(audio_file))
 with gr.Blocks(title=BOT_NAME) as demo:
     gr.Markdown(f"# {BOT_NAME}")
     gr.Markdown(f"{MOTOR_NAME} | {PROVIDER}")
     with gr.Row():
         input_audio = gr.Audio(label="Speak to Nilla", type="filepath")
     with gr.Row():
-        output_text = gr.Textbox(label="Transcribed Text (Groq)")
-        output_audio = gr.Audio(label="Nilla's Response")
     submit_btn = gr.Button("Process")
     submit_btn.click(
         fn=run_interface,
-        inputs=input_audio,
-        outputs=[output_audio, output_text]
     )
 if __name__ == "__main__":

 UK_SERVER_API = os.environ.get("UK_SERVER_API")
 GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
+client_gemini = genai.Client(
+    api_key=UK_SERVER_API,
+    http_options={"api_version": "v1alpha"}
+)
 client_groq = Groq(api_key=GROQ_API_KEY)
+async def nilla_engine(audio_path, chat_history):
     if audio_path is None:
+        return None, "", "", chat_history
+    if chat_history is None:
+        chat_history = []
     with open(audio_path, "rb") as file:
         transcription = client_groq.audio.transcriptions.create(
     config = {
         "response_modalities": ["AUDIO"],
+        "system_instruction": POR,
+        "enable_affective_dialog": True,
+        "output_audio_transcription": {}
     }
+    current_turns = chat_history + [{"role": "user", "parts": [{"text": user_text}]}]
+    model_response_text = ""
     try:
         async with client_gemini.aio.live.connect(model=MODEL_ID, config=config) as session:
             await session.send_client_content(
+                turns=current_turns,
                 turn_complete=True
             )
                 wav.setsampwidth(2)
                 wav.setframerate(24000)
+                async for response in session.receive():
                     if response.data is not None:
                         wav.writeframes(response.data)
+                    if response.server_content and response.server_content.output_transcription:
+                        model_response_text += response.server_content.output_transcription.text
+        new_history = current_turns + [{"role": "model", "parts": [{"text": model_response_text}]}]
+        return output_path, user_text, model_response_text, new_history
     except Exception:
+        return None, user_text, "Error processing", chat_history
+def run_interface(audio_file, chat_history):
     loop = asyncio.new_event_loop()
     asyncio.set_event_loop(loop)
+    return loop.run_until_complete(nilla_engine(audio_file, chat_history))
 with gr.Blocks(title=BOT_NAME) as demo:
     gr.Markdown(f"# {BOT_NAME}")
     gr.Markdown(f"{MOTOR_NAME} | {PROVIDER}")
+    history_state = gr.State([])
     with gr.Row():
         input_audio = gr.Audio(label="Speak to Nilla", type="filepath")
     with gr.Row():
+        output_text_user = gr.Textbox(label="You said (Groq)")
+        output_text_nilla = gr.Textbox(label="Nilla said (Transcript)")
+    with gr.Row():
+        output_audio = gr.Audio(label="Nilla's Voice Response")
     submit_btn = gr.Button("Process")
     submit_btn.click(
         fn=run_interface,
+        inputs=[input_audio, history_state],
+        outputs=[output_audio, output_text_user, output_text_nilla, history_state]
     )
 if __name__ == "__main__":