Spaces:

helvekami
/

ShukaNote

Running

helvekami commited on Mar 6, 2025

Commit

400fc00

1 Parent(s): 86fab4a

Updated Gradio App

Files changed (1) hide show

app.py CHANGED Viewed

@@ -20,7 +20,7 @@ def process_audio(audio):
         return "No audio provided. Please upload or record an audio file."
     try:
-        # Gradio returns a tuple: (sample_rate, numpy_array)
         sample_rate, audio_data = audio
     except Exception as e:
         return f"Error processing audio input: {e}"
@@ -28,7 +28,14 @@ def process_audio(audio):
     if audio_data is None or len(audio_data) == 0:
         return "Audio data is empty. Please try again with a valid audio file."
-    # Convert audio data to float if not already floating-point.
     if not np.issubdtype(audio_data.dtype, np.floating):
         audio_data = audio_data.astype(np.float32)
@@ -62,12 +69,12 @@ def process_audio(audio):
 # Create the Gradio interface.
 iface = gr.Interface(
     fn=process_audio,
-    inputs=gr.Audio(type="numpy"),  # File upload for audio.
     outputs="text",
     title="Sarvam AI Shuka Voice Demo",
     description="Upload an audio file and get a response using Sarvam AI's Shuka model."
 )
 if __name__ == "__main__":
-    # Set share=True to create a public link, and specify a server port.
     iface.launch(share=True, server_port=7861)

         return "No audio provided. Please upload or record an audio file."
     try:
+        # Gradio returns a tuple: (sample_rate, audio_data)
         sample_rate, audio_data = audio
     except Exception as e:
         return f"Error processing audio input: {e}"
     if audio_data is None or len(audio_data) == 0:
         return "Audio data is empty. Please try again with a valid audio file."
+    # Ensure audio_data is a numpy array.
+    audio_data = np.asarray(audio_data)
+    # If audio data is multi-dimensional, squeeze to 1D.
+    if audio_data.ndim > 1:
+        audio_data = np.squeeze(audio_data)
+    # Convert audio data to floating-point if it's not already.
     if not np.issubdtype(audio_data.dtype, np.floating):
         audio_data = audio_data.astype(np.float32)
 # Create the Gradio interface.
 iface = gr.Interface(
     fn=process_audio,
+    inputs=gr.Audio(type="numpy"),  # Using file upload for audio input.
     outputs="text",
     title="Sarvam AI Shuka Voice Demo",
     description="Upload an audio file and get a response using Sarvam AI's Shuka model."
 )
 if __name__ == "__main__":
+    # Set share=True to create a public link and use a non-default port.
     iface.launch(share=True, server_port=7861)