Update app.py
Browse files
app.py
CHANGED
|
@@ -10,6 +10,8 @@ client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.1")
|
|
| 10 |
# Initialize the ASR pipeline
|
| 11 |
asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
|
| 12 |
|
|
|
|
|
|
|
| 13 |
def speech_to_text(speech):
|
| 14 |
"""Converts speech to text using the ASR pipeline."""
|
| 15 |
return asr(speech)["text"]
|
|
@@ -23,7 +25,6 @@ def classify_mood(input_string):
|
|
| 23 |
return word, True
|
| 24 |
return None, False
|
| 25 |
|
| 26 |
-
|
| 27 |
def generate(prompt, history, temperature=0.1, max_new_tokens=2048, top_p=0.8, repetition_penalty=1.0):
|
| 28 |
temperature = float(temperature)
|
| 29 |
if temperature < 1e-2:
|
|
@@ -51,6 +52,7 @@ def generate(prompt, history, temperature=0.1, max_new_tokens=2048, top_p=0.8, r
|
|
| 51 |
playlist_message = f"Playing {mood.capitalize()} playlist for you!"
|
| 52 |
return playlist_message
|
| 53 |
return output
|
|
|
|
| 54 |
def format_prompt(message, history):
|
| 55 |
"""Formats the prompt including fixed instructions and conversation history."""
|
| 56 |
fixed_prompt = """
|
|
@@ -63,8 +65,6 @@ def format_prompt(message, history):
|
|
| 63 |
Note: Mood should be classified only from any of these 4 classes {Happy, Sad, Instrumental, Party}, if not any of these 4 then continue with a follow-up question until you classify the mood.
|
| 64 |
|
| 65 |
Note: if user asks something like i need a coffee then do not classify the mood directly and ask more follow-up questions as asked in examples.
|
| 66 |
-
|
| 67 |
-
[Examples omitted for brevity]
|
| 68 |
"""
|
| 69 |
prompt = f"{fixed_prompt}\n"
|
| 70 |
for user_prompt, bot_response in history:
|
|
@@ -84,7 +84,7 @@ def process_input(input_text, history):
|
|
| 84 |
return history, history, "", None
|
| 85 |
response = generate(input_text, history)
|
| 86 |
history.append((input_text, response))
|
| 87 |
-
return history, history, "", None
|
| 88 |
|
| 89 |
async def generate_audio(history):
|
| 90 |
if history and len(history) > 0:
|
|
@@ -93,6 +93,11 @@ async def generate_audio(history):
|
|
| 93 |
return audio_path
|
| 94 |
return None
|
| 95 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
# Gradio interface setup
|
| 97 |
with gr.Blocks() as demo:
|
| 98 |
gr.Markdown("# Mood-Based Music Recommender with Continuous Voice Chat")
|
|
@@ -107,6 +112,9 @@ with gr.Blocks() as demo:
|
|
| 107 |
submit = gr.Button("Send")
|
| 108 |
voice_input = gr.Audio(sources="microphone", type="filepath", label="Voice Input")
|
| 109 |
|
|
|
|
|
|
|
|
|
|
| 110 |
# Handle text input
|
| 111 |
msg.submit(process_input, inputs=[msg, state], outputs=[state, chatbot, msg, voice_input]).then(
|
| 112 |
generate_audio, inputs=[state], outputs=[audio_output]
|
|
|
|
| 10 |
# Initialize the ASR pipeline
|
| 11 |
asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
|
| 12 |
|
| 13 |
+
INITIAL_MESSAGE = "Hi! I'm your music buddy—tell me about your mood and the type of tunes you're in the mood for today!"
|
| 14 |
+
|
| 15 |
def speech_to_text(speech):
|
| 16 |
"""Converts speech to text using the ASR pipeline."""
|
| 17 |
return asr(speech)["text"]
|
|
|
|
| 25 |
return word, True
|
| 26 |
return None, False
|
| 27 |
|
|
|
|
| 28 |
def generate(prompt, history, temperature=0.1, max_new_tokens=2048, top_p=0.8, repetition_penalty=1.0):
|
| 29 |
temperature = float(temperature)
|
| 30 |
if temperature < 1e-2:
|
|
|
|
| 52 |
playlist_message = f"Playing {mood.capitalize()} playlist for you!"
|
| 53 |
return playlist_message
|
| 54 |
return output
|
| 55 |
+
|
| 56 |
def format_prompt(message, history):
|
| 57 |
"""Formats the prompt including fixed instructions and conversation history."""
|
| 58 |
fixed_prompt = """
|
|
|
|
| 65 |
Note: Mood should be classified only from any of these 4 classes {Happy, Sad, Instrumental, Party}, if not any of these 4 then continue with a follow-up question until you classify the mood.
|
| 66 |
|
| 67 |
Note: if user asks something like i need a coffee then do not classify the mood directly and ask more follow-up questions as asked in examples.
|
|
|
|
|
|
|
| 68 |
"""
|
| 69 |
prompt = f"{fixed_prompt}\n"
|
| 70 |
for user_prompt, bot_response in history:
|
|
|
|
| 84 |
return history, history, "", None
|
| 85 |
response = generate(input_text, history)
|
| 86 |
history.append((input_text, response))
|
| 87 |
+
return history, history, "", None
|
| 88 |
|
| 89 |
async def generate_audio(history):
|
| 90 |
if history and len(history) > 0:
|
|
|
|
| 93 |
return audio_path
|
| 94 |
return None
|
| 95 |
|
| 96 |
+
async def init_chat():
|
| 97 |
+
history = [("", INITIAL_MESSAGE)]
|
| 98 |
+
audio_path = await text_to_speech(INITIAL_MESSAGE)
|
| 99 |
+
return history, history, audio_path
|
| 100 |
+
|
| 101 |
# Gradio interface setup
|
| 102 |
with gr.Blocks() as demo:
|
| 103 |
gr.Markdown("# Mood-Based Music Recommender with Continuous Voice Chat")
|
|
|
|
| 112 |
submit = gr.Button("Send")
|
| 113 |
voice_input = gr.Audio(sources="microphone", type="filepath", label="Voice Input")
|
| 114 |
|
| 115 |
+
# Initialize chat with greeting
|
| 116 |
+
demo.load(init_chat, outputs=[state, chatbot, audio_output])
|
| 117 |
+
|
| 118 |
# Handle text input
|
| 119 |
msg.submit(process_input, inputs=[msg, state], outputs=[state, chatbot, msg, voice_input]).then(
|
| 120 |
generate_audio, inputs=[state], outputs=[audio_output]
|