omni-audio

Build error

App Files Files Community

PerryCheng614 commited on Oct 30, 2024

Commit

1bccd9f

1 Parent(s): ff9e518

update gradio UI for slider and file system

Browse files

Files changed (1) hide show

app.py +37 -46

app.py CHANGED Viewed

@@ -5,54 +5,43 @@ import os
 # FastAPI endpoint
 API_URL = "https://nexa-omni.nexa4ai.com/process-audio/"
-def process_audio(audio_path, prompt=""):
     """
     Send audio file to FastAPI backend for processing
     """
-    # Clear any previous audio files before processing new one
-    clear_previous_audio_files(audio_path)
     try:
-        # Prepare the file for upload
-        files = {
-            'file': ('audio.wav', open(audio_path, 'rb'), 'audio/wav')
-        }
-        # Send prompt as form data
-        data = {'prompt': prompt}
-        # Make the request to FastAPI
-        response = requests.post(API_URL, files=files, data=data)
-        response.raise_for_status()
-        return response.json()['response']
     except Exception as e:
         return f"Error processing audio: {str(e)}"
-def clear_previous_audio_files(current_file):
-    """
-    Clear previous audio files in the same directory as the current file,
-    except for the current file and example files
-    """
-    if not current_file:
-        return
-    directory = os.path.dirname(current_file)
-    if not directory:
-        directory = "."
-    for file in os.listdir(directory):
-        file_path = os.path.join(directory, file)
-        # Skip if it's the current file, example files, or not a file
-        if (file_path == current_file or
-            'example' in file_path or
-            not os.path.isfile(file_path) or
-            not file.endswith(('.wav', '.mp3'))):
-            continue
-        try:
-            os.remove(file_path)
-        except:
-            pass  # Silently ignore deletion errors
 # Create Gradio interface
 demo = gr.Interface(
     fn=process_audio,
@@ -62,21 +51,23 @@ demo = gr.Interface(
             label="Upload or Record Audio",
             sources=["upload", "microphone"]
         ),
-        gr.Textbox(
-            placeholder="Enter prompt (optional)",
-            label="Prompt",
-            value="Summarize this audio in English."
         )
     ],
     outputs=gr.Textbox(label="Response"),
     title="Nexa Omni",
     description="Upload an audio file and optionally provide a prompt to analyze the audio content.",
     examples=[
-        ["example_audios/example_1.wav", "transcribe this audio in English"],
     ]
 )
-def clear_output(audio, prompt):
     return ""
 demo.load_examples = clear_output

 # FastAPI endpoint
 API_URL = "https://nexa-omni.nexa4ai.com/process-audio/"
+# Add this global variable to track the last valid audio file
+last_valid_audio = None
+def process_audio(audio_path, max_tokens):
     """
     Send audio file to FastAPI backend for processing
     """
+    global last_valid_audio
+    # Check audio file availability
+    if audio_path and os.path.exists(audio_path):
+        # New audio uploaded/recorded
+        last_valid_audio = audio_path
+    elif not audio_path and not last_valid_audio:
+        # No audio provided and no previous valid audio
+        return "Please upload or record an audio file first."
+    # Use the appropriate audio path
+    current_audio = last_valid_audio if last_valid_audio else audio_path
     try:
+        # Only proceed if we have a valid audio file
+        if current_audio and os.path.exists(current_audio):
+            files = {
+                'file': ('audio.wav', open(current_audio, 'rb'), 'audio/wav')
+            }
+            data = {'max_tokens': max_tokens}
+            response = requests.post(API_URL, files=files, data=data)
+            response.raise_for_status()
+            return response.json()['response']
+        else:
+            return "No valid audio file available."
     except Exception as e:
         return f"Error processing audio: {str(e)}"
 # Create Gradio interface
 demo = gr.Interface(
     fn=process_audio,
             label="Upload or Record Audio",
             sources=["upload", "microphone"]
         ),
+        gr.Slider(
+            minimum=50,
+            maximum=200,
+            value=50,
+            step=1,
+            label="Max Tokens"
         )
     ],
     outputs=gr.Textbox(label="Response"),
     title="Nexa Omni",
     description="Upload an audio file and optionally provide a prompt to analyze the audio content.",
     examples=[
+        ["example_audios/example_1.wav", 50],
     ]
 )
+def clear_output(audio, max_tokens):
     return ""
 demo.load_examples = clear_output