Spaces:

BACKENDAPI2024
/

radarbackend11262024v11

Runtime error

App Files Files Community

Pijush2023 commited on Oct 25, 2024

Commit

3595ee8

verified ·

1 Parent(s): ebe2251

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -4

app.py CHANGED Viewed

@@ -58,21 +58,31 @@ pipe_asr = pipeline(
     return_timestamps=True
 )
-# Function to determine if a pause occurred
 def determine_pause(audio: np.ndarray, sampling_rate: int, state: AppState) -> bool:
-    """Take in the stream, determine if a pause happened"""
     temp_audio = audio
     dur_vad = len(temp_audio) / sampling_rate  # Simulating VAD duration for this example
     duration = len(audio) / sampling_rate
     if dur_vad > 0.5 and not state.started_talking:
         print("Started talking")
         state.started_talking = True
         return False
-    print(f"Duration after VAD: {dur_vad:.3f} s")
-    return (duration - dur_vad) > 1  # Adjust the threshold for pause duration as needed
 # Function to process audio input, detect pauses, and handle state
 def process_audio(audio: tuple, state: AppState):
@@ -96,10 +106,20 @@ def process_audio(audio: tuple, state: AppState):
         _, transcription, _ = transcribe_function(state.stream, (state.sampling_rate, state.stream))
         print(f"Transcription: {transcription}")
         # Retrieve hybrid response using Neo4j and other methods
         response_text = retriever(transcription)
         print(f"Response: {response_text}")
         # Generate audio from the response text
         audio_path = generate_audio_elevenlabs(response_text)
@@ -112,6 +132,7 @@ def process_audio(audio: tuple, state: AppState):
     return None, state
 # Function to process audio input and transcribe it
 def transcribe_function(stream, new_chunk):
     try:

     return_timestamps=True
 )
+# Adjusted function to determine if a pause occurred
 def determine_pause(audio: np.ndarray, sampling_rate: int, state: AppState) -> bool:
+    """Take in the stream, determine if a pause happened."""
     temp_audio = audio
     dur_vad = len(temp_audio) / sampling_rate  # Simulating VAD duration for this example
     duration = len(audio) / sampling_rate
+    # Log the duration and VAD result for debugging
+    print(f"Duration after VAD: {dur_vad:.3f} s, Total Duration: {duration:.3f} s")
+    # Check if speech has started
     if dur_vad > 0.5 and not state.started_talking:
         print("Started talking")
         state.started_talking = True
         return False
+    # If the difference between total duration and VAD duration is significant, consider it a pause
+    # Adjust the threshold for pause detection (e.g., 0.5 seconds)
+    pause_threshold = 0.5  # This value can be adjusted to be more sensitive
+    if (duration - dur_vad) > pause_threshold and state.started_talking:
+        print("Pause detected")
+        return True
+    return False
 # Function to process audio input, detect pauses, and handle state
 def process_audio(audio: tuple, state: AppState):
         _, transcription, _ = transcribe_function(state.stream, (state.sampling_rate, state.stream))
         print(f"Transcription: {transcription}")
+        # Check if transcription is empty
+        if not transcription:
+            print("No transcription available.")
+            return None, state
         # Retrieve hybrid response using Neo4j and other methods
         response_text = retriever(transcription)
         print(f"Response: {response_text}")
+        # Check if the response is empty before proceeding
+        if not response_text:
+            print("No response generated.")
+            return None, state
         # Generate audio from the response text
         audio_path = generate_audio_elevenlabs(response_text)
     return None, state
 # Function to process audio input and transcribe it
 def transcribe_function(stream, new_chunk):
     try: