Spaces:

BACKENDAPI2024
/

radarbackend11262024v11

Runtime error

App Files Files Community

Pijush2023 commited on Oct 25, 2024

Commit

089a83f

verified ·

1 Parent(s): 45f988c

Update app.py

Browse files

Files changed (1) hide show

app.py +105 -86

app.py CHANGED Viewed

@@ -11,42 +11,15 @@ from langchain_community.graphs import Neo4jGraph
 from langchain_core.prompts import ChatPromptTemplate
 import time
 import os
-import io
-from pydub import AudioSegment
-from dataclasses import dataclass,field
-import numpy as np
-# Define AppState dataclass for managing the application's state
 @dataclass
 class AppState:
     stream: np.ndarray | None = None
     sampling_rate: int = 0
     pause_detected: bool = False
-    stopped: bool = False
-    conversation: list = field(default_factory=list)
-    #conversation: list = []
-def determine_pause(audio: np.ndarray, sampling_rate: int, state: AppState) -> bool:
-    """Take in the stream, determine if a pause happened"""
-    temp_audio = audio
-    dur_vad, _, time_vad = run_vad(temp_audio, sampling_rate)
-    duration = len(audio) / sampling_rate
-    if dur_vad > 0.5 and not state.started_talking:
-        print("started talking")
-        state.started_talking = True
-        return False
-    print(f"duration_after_vad: {dur_vad:.3f} s, time_vad: {time_vad:.3f} s")
-    return (duration - dur_vad) > 1
 # Neo4j setup
 graph = Neo4jGraph(
@@ -85,12 +58,23 @@ pipe_asr = pipeline(
     return_timestamps=True
 )
-# Function to reset the state after 2 seconds
-def auto_reset_state():
-    time.sleep(2)
-    return AppState()  # Reset the state
-# Function to process audio input and handle pause detection
 def process_audio(audio: tuple, state: AppState):
     if state.stream is None:
         state.stream = audio[1]
@@ -98,52 +82,68 @@ def process_audio(audio: tuple, state: AppState):
     else:
         state.stream = np.concatenate((state.stream, audio[1]))
-    # Detect pauses in the audio stream
     pause_detected = determine_pause(state.stream, state.sampling_rate, state)
     state.pause_detected = pause_detected
-    # If a pause is detected and the user has started talking, stop recording
     if state.pause_detected and state.started_talking:
-        return gr.Audio(recording=False), state
     return None, state
 # Function to generate a full-text search query for Neo4j
 def generate_full_text_query(input: str) -> str:
-    # Split the input into words, ignoring any empty strings
     words = [el for el in input.split() if el]
-    # Check if there are no words
     if not words:
         return ""  # Return an empty string or a default query if desired
-    # Create the full-text query with fuzziness (~2 for proximity search)
     full_text_query = ""
     for word in words[:-1]:
         full_text_query += f" {word}~2 AND"
     full_text_query += f" {words[-1]}~2"
     return full_text_query.strip()
-# Define the template for generating responses based on context
-template = """I am a guide for Birmingham, Alabama. I can provide recommendations and insights about the city, including events and activities.
-Ask your question directly, and I'll provide a precise and quick,short and crisp response in a conversational and straight-foreward way without any Greet.
-Context:
-{context}
-Question: {question}
-Answer concisely:"""
-# Create a prompt object using the template
-prompt = ChatPromptTemplate.from_template(template)
-# Function to generate a response using the prompt and the context
-def generate_response_with_prompt(context, question):
-    formatted_prompt = prompt.format(context=context, question=question)
-    llm = ChatOpenAI(temperature=0, api_key=os.environ['OPENAI_API_KEY'])
-    response = llm(formatted_prompt)
-    return response.content.strip()
 # Function to generate audio with Eleven Labs TTS
 def generate_audio_elevenlabs(text):
     XI_API_KEY = os.environ['ELEVENLABS_API']
@@ -170,15 +170,37 @@ def generate_audio_elevenlabs(text):
                 if chunk:
                     f.write(chunk)
             audio_path = f.name
-        return audio_path  # Return audio path for automatic playback
     else:
         print(f"Error generating audio: {response.text}")
         return None
-# Define the function to retrieve information using Neo4j and the vector store
 def retriever(question: str):
-    structured_query = """
-    CALL db.index.fulltext.queryNodes('entity', $query, {limit: 2})
     YIELD node, score
     RETURN node.id AS entity, node.text AS context, score
     ORDER BY score DESC
@@ -191,27 +213,24 @@ def retriever(question: str):
     unstructured_response = "\n".join(unstructured_data)
     combined_context = f"Structured data:\n{structured_response}\n\nUnstructured data:\n{unstructured_response}"
-    return generate_response_with_prompt(combined_context, question)
-# Function to handle the entire audio query and response process
-def process_audio_query(state: AppState, audio_input):
-    state, _ = process_audio(audio_input, state)
-    if state.pause_detected:
-        # Perform transcription once pause is detected
-        transcription = pipe_asr({"array": state.stream, "sampling_rate": state.sampling_rate}, return_timestamps=False)["text"]
-        response_text = retriever(transcription)
-        audio_path = generate_audio_elevenlabs(response_text)
-        return audio_path, state
-    return None, state
 # Create Gradio interface for audio input and output
-with gr.Blocks() as interface:
-    audio_input = gr.Audio(sources="microphone", type="numpy", streaming=True, every=0.1)
-    submit_button = gr.Button("Submit")
-    audio_output = gr.Audio(type="filepath", autoplay=True)
-    state = gr.State(AppState())
-    submit_button.click(fn=process_audio_query, inputs=[state, audio_input], outputs=[audio_output, state])
 # Launch the Gradio app
-interface.launch(show_error=True)

 from langchain_core.prompts import ChatPromptTemplate
 import time
 import os
+from dataclasses import dataclass
+# Define AppState to store audio state information
 @dataclass
 class AppState:
     stream: np.ndarray | None = None
     sampling_rate: int = 0
     pause_detected: bool = False
+    started_talking: bool = False
 # Neo4j setup
 graph = Neo4jGraph(
     return_timestamps=True
 )
+# Function to determine if a pause occurred
+def determine_pause(audio: np.ndarray, sampling_rate: int, state: AppState) -> bool:
+    """Take in the stream, determine if a pause happened"""
+    temp_audio = audio
+    dur_vad = len(temp_audio) / sampling_rate  # Simulating VAD duration for this example
+    duration = len(audio) / sampling_rate
+    if dur_vad > 0.5 and not state.started_talking:
+        print("Started talking")
+        state.started_talking = True
+        return False
+    print(f"Duration after VAD: {dur_vad:.3f} s")
+    return (duration - dur_vad) > 1  # Adjust the threshold for pause duration as needed
+# Function to process audio input, detect pauses, and handle state
 def process_audio(audio: tuple, state: AppState):
     if state.stream is None:
         state.stream = audio[1]
     else:
         state.stream = np.concatenate((state.stream, audio[1]))
+    # Check for a pause in speech
     pause_detected = determine_pause(state.stream, state.sampling_rate, state)
     state.pause_detected = pause_detected
     if state.pause_detected and state.started_talking:
+        # Transcribe the audio when a pause is detected
+        _, transcription, _ = transcribe_function(state.stream, (state.sampling_rate, state.stream))
+        print(f"Transcription: {transcription}")
+        # Retrieve hybrid response using Neo4j and other methods
+        response_text = retriever(transcription)
+        print(f"Response: {response_text}")
+        # Generate audio from the response text
+        audio_path = generate_audio_elevenlabs(response_text)
+        # Reset state for the next input
+        state.stream = None
+        state.started_talking = False
+        state.pause_detected = False
+        return audio_path, state
     return None, state
+# Function to process audio input and transcribe it
+def transcribe_function(stream, new_chunk):
+    try:
+        sr, y = new_chunk[0], new_chunk[1]
+    except TypeError:
+        print(f"Error chunk structure: {type(new_chunk)}, content: {new_chunk}")
+        return stream, "", None
+    if y is None or len(y) == 0:
+        return stream, "", None
+    y = y.astype(np.float32)
+    max_abs_y = np.max(np.abs(y))
+    if max_abs_y > 0:
+        y = y / max_abs_y
+    if stream is not None and len(stream) > 0:
+        stream = np.concatenate([stream, y])
+    else:
+        stream = y
+    result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False)
+    full_text = result.get("text", "")
+    return stream, full_text, full_text
 # Function to generate a full-text search query for Neo4j
 def generate_full_text_query(input: str) -> str:
     words = [el for el in input.split() if el]
     if not words:
         return ""  # Return an empty string or a default query if desired
     full_text_query = ""
     for word in words[:-1]:
         full_text_query += f" {word}~2 AND"
     full_text_query += f" {words[-1]}~2"
     return full_text_query.strip()
 # Function to generate audio with Eleven Labs TTS
 def generate_audio_elevenlabs(text):
     XI_API_KEY = os.environ['ELEVENLABS_API']
                 if chunk:
                     f.write(chunk)
             audio_path = f.name
+        return audio_path
     else:
         print(f"Error generating audio: {response.text}")
         return None
+# Define the template for generating responses based on context
+template = """I am a guide for Birmingham, Alabama. I can provide recommendations and insights about the city, including events and activities.
+Ask your question directly, and I'll provide a precise and quick, short and crisp response in a conversational and straightforward way without any Greet.
+Context:
+{context}
+Question: {question}
+Answer concisely:"""
+# Create a prompt object using the template
+prompt = ChatPromptTemplate.from_template(template)
+# Function to generate a response using the prompt and the context
+def generate_response_with_prompt(context, question):
+    formatted_prompt = prompt.format(
+        context=context,
+        question=question
+    )
+    llm = ChatOpenAI(temperature=0, api_key=os.environ['OPENAI_API_KEY'])
+    response = llm(formatted_prompt)
+    return response.content.strip()
+# Define the function to generate a hybrid response using Neo4j and other retrieval methods
 def retriever(question: str):
+    structured_query = f"""
+    CALL db.index.fulltext.queryNodes('entity', $query, {{limit: 2}})
     YIELD node, score
     RETURN node.id AS entity, node.text AS context, score
     ORDER BY score DESC
     unstructured_response = "\n".join(unstructured_data)
     combined_context = f"Structured data:\n{structured_response}\n\nUnstructured data:\n{unstructured_response}"
+    final_response = generate_response_with_prompt(combined_context, question)
+    return final_response
 # Create Gradio interface for audio input and output
+interface = gr.Interface(
+    fn=lambda audio, state: process_audio(audio, state),
+    inputs=[
+        gr.Audio(sources="microphone", type="numpy", streaming=True),
+        gr.State(AppState())
+    ],
+    outputs=[
+        gr.Audio(type="filepath", autoplay=True, interactive=False),
+        gr.State()
+    ],
+    live=True,
+    description="Ask questions via audio and receive audio responses.",
+    allow_flagging="never"
+)
 # Launch the Gradio app
+interface.launch()