Mood_Based_Music_Recommender

Running

App Files Files Community

syedmudassir16 commited on Sep 24, 2024

Commit

06f1280

verified ·

1 Parent(s): 196e87a

Update app.py

Browse files

Files changed (1) hide show

app.py +227 -154

app.py CHANGED Viewed

@@ -1,57 +1,26 @@
-import os
-import gradio as gr
-import whisper
-from gtts import gTTS
-import io
-import logging
 from huggingface_hub import InferenceClient
-# Set up logging
-logging.basicConfig(level=logging.DEBUG)
-logger = logging.getLogger(__name__)
-# Initialize the Hugging Face Inference Client
 client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.1")
-# Load the Whisper model
-model = whisper.load_model("base")
-def format_prompt(message, history):
-    fixed_prompt = """
-    You are a smart mood analyser, who determines user mood. Based on the user input, classify the mood of the user into one of the four moods {Happy, Sad, Instrumental, Party}. If you are finding it difficult to classify into one of these four moods, keep the conversation going on until we classify the user's mood. Return a single-word reply from one of the options if you have classified. Suppose you classify a sentence as happy, then just respond with "happy".
-    Note: Do not write anything else other than the classified mood if classified.
-    Note: If any question or any user text cannot be classified, follow up with a question to know the user's mood until you classify the mood.
-    Note: Mood should be classified only from any of these 4 classes {Happy, Sad, Instrumental, Party}, if not any of these 4 then continue with a follow-up question until you classify the mood.
-    Note: if user asks something like i need a coffee then do not classify the mood directly and ask more follow-up questions as asked in examples.
-    Examples:
-    User: I'm feeling so energetic today!
-    LLM Response: Happy
-    User: I'm feeling down today.
-    LLM Response: Sad
-    User: I need some background music while I am stuck in traffic.
-    LLM Response: Instrumental
-    User: Let's have a blast tonight!
-    LLM Response: Party
-    User: Hi
-    LLM Response: Hi, how are you doing?
-    User: I need a coffee
-    LLM Response: It sounds like you're in need of a little pick-me-up. How are you feeling right now? Are you looking for something upbeat, something to relax to, or maybe some instrumental music while you enjoy your coffee?
-    """
-    prompt = f"<s>{fixed_prompt}"
-    for user_prompt, bot_response in history:
-        prompt += f"\n User:{user_prompt}\n LLM Response:{bot_response}"
-    prompt += f"\nUser: {message}\nLLM Response:"
-    return prompt
 def classify_mood(input_string):
     input_string = input_string.lower()
     mood_words = {"happy", "sad", "instrumental", "party"}
     for word in mood_words:
@@ -84,117 +53,221 @@ def generate(
     for response in stream:
         output += response.token.text
         mood, is_classified = classify_mood(output)
         if is_classified:
             playlist_message = f"Playing {mood.capitalize()} playlist for you!"
-            return playlist_message
     return output
-def process_audio(audio_file):
-    try:
-        logger.debug(f"Processing audio file: {audio_file}")
-        # Check if audio_file is None or empty
-        if audio_file is None or not os.path.exists(audio_file):
-            logger.warning("No audio input detected")
-            return "No audio input detected. Please try again.", "", None
-        # Load audio file
-        audio = whisper.load_audio(audio_file)
-        # Check if audio is empty
-        if len(audio) == 0:
-            logger.warning("Empty audio file detected")
-            return "The audio file appears to be empty. Please try again with a valid audio input.", "", None
-        # Transcribe the audio using Whisper
-        logger.debug("Transcribing audio")
-        result = model.transcribe(audio)
-        text = result["text"]
-        # Check if transcription is empty
-        if not text.strip():
-            logger.warning("No speech detected in the audio")
-            return "No speech detected in the audio. Please try speaking more clearly or check your microphone.", "", None
-        logger.debug(f"Transcribed text: {text}")
-        # Generate a response using the existing generate function
-        logger.debug("Generating response")
-        response = generate(text, [])
-        logger.debug(f"Generated response: {response}")
-        # Convert the response text to speech
-        logger.debug("Converting response to speech")
-        tts = gTTS(response)
-        response_audio_io = io.BytesIO()
-        tts.write_to_fp(response_audio_io)
-        response_audio_io.seek(0)
-        # Save audio to a file
-        response_audio_path = "response.mp3"
-        with open(response_audio_path, "wb") as audio_file:
-            audio_file.write(response_audio_io.getvalue())
-        logger.debug("Audio processing completed successfully")
-        return text, response, response_audio_path
-    except Exception as e:
-        logger.exception("An error occurred while processing audio")
-        return f"An error occurred: {str(e)}", "", None
-# Create the Gradio interface with customized UI
-with gr.Blocks(css="""
-    .gradio-container {
-        font-family: Arial, sans-serif;
-        background-color: #f0f4c3;
-        border-radius: 10px;
-        padding: 20px;
-        box-shadow: 0 4px 12px rgba(0,0,0,0.2);
-        text-align: center;
-    }
-    .gradio-input, .gradio-output {
-        border-radius: 6px;
-        border: 1px solid #ddd;
-        padding: 10px;
-    }
-    .gradio-button {
-        background-color: #ff7043;
-        color: white;
-        border-radius: 6px;
-        border: none;
-        padding: 10px 20px;
-        font-size: 16px;
-        cursor: pointer;
-    }
-    .gradio-button:hover {
-        background-color: #e64a19;
-    }
-    .gradio-title {
-        font-size: 28px;
-        font-weight: bold;
-        margin-bottom: 20px;
-        color: #37474f;
-    }
-    .gradio-description {
-        font-size: 16px;
-        margin-bottom: 20px;
-        color: #616161;
-    }
-""") as demo:
-    gr.Markdown("# Voice-Enabled Mood-Based Music Recommender")
-    gr.Markdown("Upload an audio file or use the microphone to interact with the mood-based music recommender. The system will transcribe your audio, analyze your mood, and provide a spoken recommendation.")
     with gr.Row():
-        with gr.Column():
-            audio_input = gr.Audio(source="microphone", type="filepath", label="Upload Audio or Use Microphone")
-            submit_button = gr.Button("Submit")
-        with gr.Column():
-            transcription = gr.Textbox(label="Transcription", placeholder="Your speech will be transcribed here", lines=3)
-            response_text = gr.Textbox(label="Recommendation", placeholder="The mood-based recommendation will appear here", lines=3)
-            response_audio = gr.Audio(label="Audio Response", type="filepath")
-    submit_button.click(fn=process_audio, inputs=audio_input, outputs=[transcription, response_text, response_audio])
 if __name__ == "__main__":
-    demo.launch(share=True)

 from huggingface_hub import InferenceClient
+from transformers import pipeline
+import gradio as gr
+import edge_tts
+import tempfile
+import os
+from streaming_stt_nemo import Model
+import torch
+import random
+# Initialize the inference client with your Hugging Face token
 client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.1")
+# Initialize the ASR pipeline
+asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
+def speech_to_text(speech):
+    """Converts speech to text using the ASR pipeline."""
+    # breakpoint()
+    return asr(speech)["text"]
 def classify_mood(input_string):
+    """Classifies the mood based on keywords in the input string."""
     input_string = input_string.lower()
     mood_words = {"happy", "sad", "instrumental", "party"}
     for word in mood_words:
     for response in stream:
         output += response.token.text
         mood, is_classified = classify_mood(output)
+        # Print the chatbot's response
         if is_classified:
+            print("Chatbot:", mood.capitalize())
             playlist_message = f"Playing {mood.capitalize()} playlist for you!"
+            output=playlist_message
+            return output
+        # yield output
     return output
+def format_prompt(message, history):
+    """Formats the prompt including fixed instructions and conversation history."""
+    fixed_prompt = """
+            You are a smart mood analyser, who determines user mood. Based on the user input, classify the mood of the user into one of the four moods {Happy, Sad, Instrumental, Party}. If you are finding it difficult to classify into one of these four moods, keep the conversation going on until we classify the user’s mood. Return a single-word reply from one of the options if you have classified. Suppose you classify a sentence as happy, then just respond with "happy".
+            Note: Do not write anything else other than the classified mood if classified.
+            Note: If any question or any user text cannot be classified, follow up with a question to know the user's mood until you classify the mood.
+            Note: Mood should be classified only from any of these 4 classes {Happy, Sad, Instrumental, Party}, if not any of these 4 then continue with a follow-up question until you classify the mood.
+            Note: if user asks something like i need a coffee then do not classify the mood directly and ask more follow-up questions as asked in examples.
+            Examples
+            User: What is C programming?
+            LLM Response: C programming is a programming language. How are you feeling now after knowing the answer?
+            User: Can I get a coffee?
+            LLM Response: It sounds like you're in need of a little pick-me-up. How are you feeling right now? Are you looking for something upbeat, something to relax to, or maybe some instrumental music while you enjoy your coffee?
+            User: I feel like rocking
+            LLM Response: Party
+            User: I'm feeling so energetic today!
+            LLM Response: Happy
+            User: I'm feeling down today.
+            LLM Response: Sad
+            User: I'm ready to have some fun tonight!
+            LLM Response: Party
+            User: I need some background music while I am stuck in traffic.
+            LLM Response: Instrumental
+            User: Hi
+            LLM Response: Hi, how are you doing?
+            User: Feeling okay only.
+            LLM Response: Are you having a good day?
+            User: I don't know
+            LLM Response: Do you want to listen to some relaxing music?
+            User: No
+            LLM Response: How about listening to some rock and roll music?
+            User: Yes
+            LLM Response: Party
+            User: Where do I find an encyclopedia?
+            LLM Response: You can find it in any of the libraries or on the Internet. Does this answer make you happy?
+            User: I need a coffee
+            LLM Response: It sounds like you're in need of a little pick-me-up. How are you feeling right now? Are you looking for something upbeat, something to relax to, or maybe some instrumental music while you enjoy your coffee?
+            User: I just got promoted at work!
+            LLM Response: Happy
+            User: Today is my birthday!
+            LLM Response: Happy
+            User: I won a prize in the lottery.
+            LLM Response: Happy
+            User: I am so excited about my vacation next week!
+            LLM Response: Happy
+            User: I aced my exams!
+            LLM Response: Happy
+            User: I had a wonderful time with my family today.
+            LLM Response: Happy
+            User: I just finished a great workout!
+            LLM Response: Happy
+            User: I am feeling really good about myself today.
+            LLM Response: Happy
+            User: I finally finished my project and it was a success!
+            LLM Response: Happy
+            User: I just heard my favorite song on the radio.
+            LLM Response: Happy
+            User: My pet passed away yesterday.
+            LLM Response: Sad
+            User: I lost my job today.
+            LLM Response: Sad
+            User: I'm feeling really lonely.
+            LLM Response: Sad
+            User: I didn't get the results I wanted.
+            LLM Response: Sad
+            User: I had a fight with my best friend.
+            LLM Response: Sad
+            User: I'm feeling really overwhelmed with everything.
+            LLM Response: Sad
+            User: I just got some bad news.
+            LLM Response: Sad
+            User: I'm missing my family.
+            LLM Response: Sad
+            User: I am feeling really down today.
+            LLM Response: Sad
+            User: Nothing seems to be going right.
+            LLM Response: Sad
+            User: I need some music while I study.
+            LLM Response: Instrumental
+            User: I want to listen to something soothing while I work.
+            LLM Response: Instrumental
+            User: Do you have any recommendations for background music?
+            LLM Response: Instrumental
+            User: I'm looking for some relaxing tunes.
+            LLM Response: Instrumental
+            User: I need some music to focus on my tasks.
+            LLM Response: Instrumental
+            User: Can you suggest some ambient music for meditation?
+            LLM Response: Instrumental
+            User: What's good for background music during reading?
+            LLM Response: Instrumental
+            User: I need some calm music to help me sleep.
+            LLM Response: Instrumental
+            User: I prefer instrumental music while cooking.
+            LLM Response: Instrumental
+            User: What's the best music to play while doing yoga?
+            LLM Response: Instrumental
+            User: Let's have a blast tonight!
+            LLM Response: Party
+            User: I'm in the mood to dance!
+            LLM Response: Party
+            User: I want to celebrate all night long!
+            LLM Response: Party
+            User: Time to hit the club!
+            LLM Response: Party
+            User: I feel like partying till dawn.
+            LLM Response: Party
+            User: Let's get this party started!
+            LLM Response: Party
+            User: I'm ready to party hard tonight.
+            LLM Response: Party
+            User: I'm in the mood for some loud music and dancing!
+            LLM Response: Party
+            User: Tonight's going to be epic!
+            LLM Response: Party
+            User: Lets turn up the music and have some fun!
+            LLM Response: Party
+"""  # Include your fixed prompt and instructions here
+    prompt = f"{fixed_prompt}"
+    for user_prompt, bot_response in history:
+        prompt += f"\nUser: {user_prompt}\nLLM Response: {bot_response}"
+    prompt += f"\nUser: {message}\nLLM Response:"
+    return prompt
+async def process_speech(speech_file):
+        """Processes speech input to text and then calls generate."""
+        text = speech_to_text(speech_file)
+        reply = generate(text, history="")
+        communicate = edge_tts.Communicate(reply)
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
+            tmp_path = tmp_file.name
+            await communicate.save(tmp_path)
+        yield tmp_path
+DESCRIPTION = """ # <center><b>Mood-Based Music Recommender⚡</b></center>
+        ### <center>Hi! I'm a music recommender app.
+        ### <center>What kind of music do you want to listen to, or how are you feeling today?</center>
+        """
+# Gradio interface setup
+with gr.Blocks(css="style.css") as demo:
+    gr.Markdown(DESCRIPTION)
     with gr.Row():
+        input = gr.Audio(label="User", sources="microphone", type="filepath", waveform_options=False)
+        output = gr.Audio(label="AI", type="filepath",
+                        interactive=False,
+                        autoplay=True,
+                        elem_classes="audio")
+        gr.Interface(
+            batch=True,
+            max_batch_size=10,
+            fn=process_speech,
+            inputs=[input],
+            outputs=[output], live=True)
 if __name__ == "__main__":
+    demo.queue(max_size=200).launch()