Mood_Based_Music_Recommender

Running

App Files Files Community

syedmudassir16 commited on Sep 24, 2024

Commit

6651f83

verified ·

1 Parent(s): daf54ea

Update app.py

Browse files

Files changed (1) hide show

app.py +433 -68

app.py CHANGED Viewed

@@ -1,91 +1,217 @@
-import gradio as gr
 from huggingface_hub import InferenceClient
 import os
-# Initialize the Inference Client
 client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.1")
-# Ensure you have set the HUGGINGFACE_TOKEN environment variable in your Hugging Face Space
-HF_TOKEN = os.environ.get("HUGGINGFACE_TOKEN")
-if HF_TOKEN is None:
-    raise ValueError("Please set the HUGGINGFACE_TOKEN environment variable in your Hugging Face Space.")
 def format_prompt(message, history):
     fixed_prompt = """
-    [INST] You are a smart mood analyzer, who determines user mood. Based on the user input, classify the mood of the user into one of the four moods {Happy, Sad, Instrumental, Party}. If you are finding it difficult to classify into one of these four moods, keep the conversation going on until we classify the user's mood. Return a single-word reply from one of the options if you have classified. Suppose you classify a sentence as happy, then just respond with "happy".
-    Note: Do not write anything else other than the classified mood if classified.
-    Note: If any question or any user text cannot be classified, follow up with a question to know the user's mood until you classify the mood.
-    Note: Mood should be classified only from any of these 4 classes {Happy, Sad, Instrumental, Party}, if not any of these 4 then continue with a follow-up question until you classify the mood.
-    Note: if user asks something like i need a coffee then do not classify the mood directly and ask more follow-up questions as asked in examples.
-    Examples
-    User: What is C programming?
-    Assistant: C programming is a programming language. How are you feeling now after knowing the answer?
-    User: Can I get a coffee?
-    Assistant: It sounds like you're in need of a little pick-me-up. How are you feeling right now? Are you looking for something upbeat, something to relax to, or maybe some instrumental music while you enjoy your coffee?
-    User: I feel like rocking
-    Assistant: Party
-    User: I'm feeling so energetic today!
-    Assistant: Happy
-    User: I'm feeling down today.
-    Assistant: Sad
-    User: I'm ready to have some fun tonight!
-    Assistant: Party
-    User: I need some background music while I am stuck in traffic.
-    Assistant: Instrumental
-    User: Hi
-    Assistant: Hi, how are you doing?
-    User: Feeling okay only.
-    Assistant: Are you having a good day?
-    User: I don't know
-    Assistant: Do you want to listen to some relaxing music?
-    User: No
-    Assistant: How about listening to some rock and roll music?
-    User: Yes
-    Assistant: Party
-    User: Where do I find an encyclopedia?
-    Assistant: You can find it in any of the libraries or on the Internet. Does this answer make you happy?
-    User: I need a coffee
-    Assistant: It sounds like you're in need of a little pick-me-up. How are you feeling right now? Are you looking for something upbeat, something to relax to, or maybe some instrumental music while you enjoy your coffee?
-    Now, please analyze the user's mood based on their input.
-    [/INST]
-    """
     prompt = f"{fixed_prompt}"
     for user_prompt, bot_response in history:
-        prompt += f"\n User:{user_prompt}\n LLM Response:{bot_response}"
     prompt += f"\nUser: {message}\nLLM Response:"
     return prompt
-def classify_mood(input_string):
-    input_string = input_string.lower()
-    mood_words = {"happy", "sad", "instrumental", "party"}
-    for word in mood_words:
-        if word in input_string:
-            return word, True
-    return None, False
-def generate(
-    prompt, history, temperature=0.7, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0,
-):
     temperature = float(temperature)
     if temperature < 1e-2:
         temperature = 1e-2
@@ -93,10 +219,11 @@ def generate(
     generate_kwargs = dict(
         temperature=temperature,
-        max_new_tokens=max_new_tokens,
         top_p=top_p,
         repetition_penalty=repetition_penalty,
         do_sample=True,
     )
     formatted_prompt = format_prompt(prompt, history)
@@ -111,15 +238,253 @@ def generate(
             playlist_message = f"Playing {mood.capitalize()} playlist for you!"
             return playlist_message
     return output
-def chat(message, history):
-    response = generate(message, history)
-    return response
-demo = gr.ChatInterface(
-    fn=chat,
-    title="Mood-Based Music Recommender",
-    description="<span style='font-size: larger; font-weight: bold;'>Hi! I'm your music buddy—tell me about your mood and the type of tunes you're in the mood for today!</span>"
-)
-demo.launch()

 from huggingface_hub import InferenceClient
+from transformers import pipeline
+import gradio as gr
+import edge_tts
+import tempfile
 import os
+import wave
+import io
+import asyncio
+import emoji
+# Initialize the inference client with your Hugging Face token
 client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.1")
+# Initialize the ASR pipeline
+asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
+# Define the description for the Gradio interface
+DESCRIPTION = """# <center><b>Mood-Based Music Recommender⚡</b></center>
+### <center>Hi! I'm a music recommender app.
+### <center>What kind of music do you want to listen to, or how are you feeling today?</center>
+"""
+def speech_to_text(speech_path):
+    """Converts speech to text using the ASR pipeline."""
+    return asr(speech_path)["text"]
+def classify_mood(input_string):
+    """Classifies the mood based on keywords in the input string."""
+    input_string = input_string.lower()
+    mood_words = {"happy", "sad", "instrumental", "party"}
+    for word in mood_words:
+        if word in input_string:
+            return word, True
+    return None, False
 def format_prompt(message, history):
+    """Formats the prompt including fixed instructions and conversation history."""
     fixed_prompt = """
+            You are a smart mood analyser, who determines user mood. Based on the user input, classify the mood of the user into one of the four moods {Happy, Sad, Instrumental, Party}. If you are finding it difficult to classify into one of these four moods, keep the conversation going on until we classify the user’s mood. Return a single-word reply from one of the options if you have classified. Suppose you classify a sentence as happy, then just respond with "happy".
+            Note: Do not write anything else other than the classified mood if classified.
+            Note: If any question or any user text cannot be classified, follow up with a question to know the user's mood until you classify the mood.
+            Note: Mood should be classified only from any of these 4 classes {Happy, Sad, Instrumental, Party}, if not any of these 4 then continue with a follow-up question until you classify the mood.
+            Note: if user asks something like i need a coffee then do not classify the mood directly and ask more follow-up questions as asked in examples.
+            Examples
+            User: What is C programming?
+            LLM Response: C programming is a programming language. How are you feeling now after knowing the answer?
+            User: Can I get a coffee?
+            LLM Response: It sounds like you're in need of a little pick-me-up. How are you feeling right now? Are you looking for something upbeat, something to relax to, or maybe some instrumental music while you enjoy your coffee?
+            User: I feel like rocking
+            LLM Response: Party
+            User: I'm feeling so energetic today!
+            LLM Response: Happy
+            User: I'm feeling down today.
+            LLM Response: Sad
+            User: I'm ready to have some fun tonight!
+            LLM Response: Party
+            User: I need some background music while I am stuck in traffic.
+            LLM Response: Instrumental
+            User: Hi
+            LLM Response: Hi, how are you doing?
+            User: Feeling okay only.
+            LLM Response: Are you having a good day?
+            User: I don't know
+            LLM Response: Do you want to listen to some relaxing music?
+            User: No
+            LLM Response: How about listening to some rock and roll music?
+            User: Yes
+            LLM Response: Party
+            User: Where do I find an encyclopedia?
+            LLM Response: You can find it in any of the libraries or on the Internet. Does this answer make you happy?
+            User: I need a coffee
+            LLM Response: It sounds like you're in need of a little pick-me-up. How are you feeling right now? Are you looking for something upbeat, something to relax to, or maybe some instrumental music while you enjoy your coffee?
+            User: I just got promoted at work!
+            LLM Response: Happy
+            User: Today is my birthday!
+            LLM Response: Happy
+            User: I won a prize in the lottery.
+            LLM Response: Happy
+            User: I am so excited about my vacation next week!
+            LLM Response: Happy
+            User: I aced my exams!
+            LLM Response: Happy
+            User: I had a wonderful time with my family today.
+            LLM Response: Happy
+            User: I just finished a great workout!
+            LLM Response: Happy
+            User: I am feeling really good about myself today.
+            LLM Response: Happy
+            User: I finally finished my project and it was a success!
+            LLM Response: Happy
+            User: I just heard my favorite song on the radio.
+            LLM Response: Happy
+            User: My pet passed away yesterday.
+            LLM Response: Sad
+            User: I lost my job today.
+            LLM Response: Sad
+            User: I'm feeling really lonely.
+            LLM Response: Sad
+            User: I didn't get the results I wanted.
+            LLM Response: Sad
+            User: I had a fight with my best friend.
+            LLM Response: Sad
+            User: I'm feeling really overwhelmed with everything.
+            LLM Response: Sad
+            User: I just got some bad news.
+            LLM Response: Sad
+            User: I'm missing my family.
+            LLM Response: Sad
+            User: I am feeling really down today.
+            LLM Response: Sad
+            User: Nothing seems to be going right.
+            LLM Response: Sad
+            User: I need some music while I study.
+            LLM Response: Instrumental
+            User: I want to listen to something soothing while I work.
+            LLM Response: Instrumental
+            User: Do you have any recommendations for background music?
+            LLM Response: Instrumental
+            User: I'm looking for some relaxing tunes.
+            LLM Response: Instrumental
+            User: I need some music to focus on my tasks.
+            LLM Response: Instrumental
+            User: Can you suggest some ambient music for meditation?
+            LLM Response: Instrumental
+            User: What's good for background music during reading?
+            LLM Response: Instrumental
+            User: I need some calm music to help me sleep.
+            LLM Response: Instrumental
+            User: I prefer instrumental music while cooking.
+            LLM Response: Instrumental
+            User: What's the best music to play while doing yoga?
+            LLM Response: Instrumental
+            User: Let's have a blast tonight!
+            LLM Response: Party
+            User: I'm in the mood to dance!
+            LLM Response: Party
+            User: I want to celebrate all night long!
+            LLM Response: Party
+            User: Time to hit the club!
+            LLM Response: Party
+            User: I feel like partying till dawn.
+            LLM Response: Party
+            User: Let's get this party started!
+            LLM Response: Party
+            User: I'm ready to party hard tonight.
+            LLM Response: Party
+            User: I'm in the mood for some loud music and dancing!
+            LLM Response: Party
+            User: Tonight's going to be epic!
+            LLM Response: Party
+            User: Lets turn up the music and have some fun!
+            LLM Response: Party
+"""  # Include your fixed prompt and instructions here
     prompt = f"{fixed_prompt}"
     for user_prompt, bot_response in history:
+        prompt += f"\nUser: {user_prompt}\nLLM Response: {bot_response}"
     prompt += f"\nUser: {message}\nLLM Response:"
     return prompt
+def generate(prompt, history, temperature=0.1, max_new_tokens=2048, top_p=0.8, repetition_penalty=1.0):
     temperature = float(temperature)
     if temperature < 1e-2:
         temperature = 1e-2
     generate_kwargs = dict(
         temperature=temperature,
+        max_new_tokens=2048,
         top_p=top_p,
         repetition_penalty=repetition_penalty,
         do_sample=True,
+        seed=42,
     )
     formatted_prompt = format_prompt(prompt, history)
             playlist_message = f"Playing {mood.capitalize()} playlist for you!"
             return playlist_message
     return output
+def generate_llm_output(
+        prompt,
+        history,
+        llm,
+        temperature=0.8,
+        max_tokens=256,
+        top_p=0.95,
+        stop_words=["<s>","[/INST]", "</s>"]
+    ):
+        temperature = float(temperature)
+        if temperature < 1e-2:
+            temperature = 1e-2
+        top_p = float(top_p)
+        generate_kwargs = dict(
+            temperature=temperature,
+            max_tokens=max_tokens,
+            top_p=top_p,
+            stop=stop_words
+        )
+        formatted_prompt = format_prompt(prompt, history)
+        try:
+            print("LLM Input:", formatted_prompt)
+            # Local GGUF
+            stream = llm(
+                formatted_prompt,
+                **generate_kwargs,
+                stream=True,
+            )
+            output = ""
+            for response in stream:
+                character= response["choices"][0]["text"]
+                if character in stop_words:
+                    # end of context
+                    return
+                if emoji.is_emoji(character):
+                    # Bad emoji not a meaning messes chat from next lines
+                    return
+                output += response["choices"][0]["text"]
+                yield output
+        except Exception as e:
+            print("Unhandled Exception: ", str(e))
+            gr.Warning("Unfortunately Mistral is unable to process")
+            output = "I do not know what happened but I could not understand you ."
+        return output
+def get_sentence(history, client):
+    history = [["", None]] if history is None else history
+    history[-1][1] = ""
+    sentence_list = []
+    sentence_hash_list = []
+    text_to_generate = ""
+    stored_sentence = None
+    stored_sentence_hash = None
+    for character in generate_llm_output(history[-1][0], history[:-1], client):
+        history[-1][1] = character.replace("<|assistant|>","")
+        # It is coming word by word
+        text_to_generate = nltk.sent_tokenize(history[-1][1].replace("\n", " ").replace("<|assistant|>"," ").replace("<|ass>","").replace("[/ASST]","").replace("[/ASSI]","").replace("[/ASS]","").replace("","").strip())
+        if len(text_to_generate) > 1:
+            dif = len(text_to_generate) - len(sentence_list)
+            if dif == 1 and len(sentence_list) != 0:
+                continue
+            if dif == 2 and len(sentence_list) != 0 and stored_sentence is not None:
+                continue
+            # All this complexity due to trying append first short sentence to next one for proper language auto-detect
+            if stored_sentence is not None and stored_sentence_hash is None and dif>1:
+                #means we consumed stored sentence and should look at next sentence to generate
+                sentence = text_to_generate[len(sentence_list)+1]
+            elif stored_sentence is not None and len(text_to_generate)>2 and stored_sentence_hash is not None:
+                print("Appending stored")
+                sentence = stored_sentence + text_to_generate[len(sentence_list)+1]
+                stored_sentence_hash = None
+            else:
+                sentence = text_to_generate[len(sentence_list)]
+            # too short sentence just append to next one if there is any
+            # this is for proper language detection
+            if len(sentence)<=15 and stored_sentence_hash is None and stored_sentence is None:
+                if sentence[-1] in [".","!","?"]:
+                    if stored_sentence_hash != hash(sentence):
+                        stored_sentence = sentence
+                        stored_sentence_hash = hash(sentence)
+                        print("Storing:",stored_sentence)
+                        continue
+            sentence_hash = hash(sentence)
+            if stored_sentence_hash is not None and sentence_hash == stored_sentence_hash:
+                continue
+            if sentence_hash not in sentence_hash_list:
+                sentence_hash_list.append(sentence_hash)
+                sentence_list.append(sentence)
+                print("New Sentence: ", sentence)
+                yield (sentence, history)
+    # return that final sentence token
+    try:
+        last_sentence = nltk.sent_tokenize(history[-1][1].replace("\n", " ").replace("<|ass>","").replace("[/ASST]","").replace("[/ASSI]","").replace("[/ASS]","").replace("","").strip())[-1]
+        sentence_hash = hash(last_sentence)
+        if sentence_hash not in sentence_hash_list:
+            if stored_sentence is not None and stored_sentence_hash is not None:
+                last_sentence = stored_sentence + last_sentence
+                stored_sentence = stored_sentence_hash = None
+                print("Last Sentence with stored:",last_sentence)
+            sentence_hash_list.append(sentence_hash)
+            sentence_list.append(last_sentence)
+            print("Last Sentence: ", last_sentence)
+            yield (last_sentence, history)
+    except:
+        print("ERROR on last sentence history is :", history)
+def wave_header_chunk(frame_input=b"", channels=1, sample_width=2, sample_rate=24000):
+    """Creates a WAV header for the audio chunk."""
+    wav_buf = io.BytesIO()
+    with wave.open(wav_buf, "wb") as vfout:
+        vfout.setnchannels(channels)
+        vfout.setsampwidth(sample_width)
+        vfout.setframerate(sample_rate)
+        vfout.writeframes(frame_input)
+    wav_buf.seek(0)
+    return wav_buf.read()
+async def process_speech(speech_file_path):
+    """Processes speech input to text and then calls generate."""
+    text = speech_to_text(speech_file_path)
+    reply = generate(text, history="")
+    communicate = edge_tts.Communicate(reply)
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
+        tmp_path = tmp_file.name
+        await communicate.save(tmp_path)
+    return tmp_path
+async def handle_speech_generation(speech_file_path, chatbot_history, chatbot_voice):
+    if speech_file_path != "":
+        speech_path = await process_speech(speech_file_path)
+        return speech_file_path, chatbot_history, speech_path
+    return "", chatbot_history, None
+async def generate_speech(chatbot_history, chatbot_voice, initial_greeting=False):
+    yield "", chatbot_history, wave_header_chunk()
+    if initial_greeting:
+        for _, sentence in chatbot_history:
+            result = await handle_speech_generation(sentence, chatbot_history, chatbot_voice)
+            if result:
+                yield result
+    else:
+        for sentence, chatbot_history in get_sentence(chatbot_history, client):
+            result = await handle_speech_generation(sentence, chatbot_history, chatbot_voice)
+            if result:
+                yield result
+def wrap_async_generator(coro, *args):
+    async def run_async_gen():
+        results = []
+        async for item in coro(*args):
+            results.append(item)
+        return results
+    return asyncio.run(run_async_gen())
+# Gradio interface setup
+with gr.Blocks(css="style.css") as demo:
+    gr.Markdown(DESCRIPTION)
+    chatbot = gr.Chatbot(
+        # value=[(None, "Hi friend, I'm Amy, an AI coach. How can I help you today?")],  # Initial greeting from the chatbot
+        elem_id="chatbot",
+        avatar_images=("examples/hf-logo.png", "examples/ai-chat-logo.png"),
+        bubble_full_width=False,
+    )
+    VOICES = ["female", "male"]
+    with gr.Row():
+        chatbot_voice = gr.Dropdown(
+            label="Voice of the Chatbot",
+            info="How should Chatbot talk like",
+            choices=VOICES,
+            multiselect=False,
+            value=VOICES[0],
+        )
+    with gr.Row():
+        txt_box = gr.Textbox(
+            scale=3,
+            show_label=False,
+            placeholder="Enter text and press enter, or speak to your microphone",
+            container=False,
+            interactive=True,
+        )
+        audio_record = gr.Audio(sources="microphone", type="filepath", scale=4)
+    with gr.Row():
+        sentence = gr.Textbox(visible=False)
+        audio_playback = gr.Audio(
+            value=None,
+            label="Generated audio response",
+            streaming=True,
+            autoplay=True,
+            interactive=False,
+            show_label=True,
+        )
+    def add_text(chatbot_history, text):
+        chatbot_history = [] if chatbot_history is None else chatbot_history
+        chatbot_history = chatbot_history + [(text, None)]
+        return chatbot_history, gr.update(value="", interactive=False)
+    async def add_audio(chatbot_history, audio_path):
+        chatbot_history = [] if chatbot_history is None else chatbot_history
+        response = speech_to_text(audio_path)
+        text = response.strip()
+        chatbot_history = chatbot_history + [(text, None)]
+        return chatbot_history, gr.update(value="", interactive=False)
+    txt_msg = txt_box.submit(fn=add_text, inputs=[chatbot, txt_box], outputs=[chatbot, txt_box], queue=False
+                             ).then(lambda *args: wrap_async_generator(generate_speech, *args), inputs=[chatbot, chatbot_voice], outputs=[sentence, chatbot, audio_playback])
+    txt_msg.then(fn=lambda: gr.update(interactive=True), inputs=None, outputs=[txt_box], queue=False)
+    audio_msg = audio_record.stop_recording(fn=add_audio, inputs=[chatbot, audio_record], outputs=[chatbot, txt_box], queue=False
+                                            ).then(lambda *args: wrap_async_generator(generate_speech, *args), inputs=[chatbot, chatbot_voice], outputs=[sentence, chatbot, audio_playback])
+    audio_msg.then(fn=lambda: (gr.update(interactive=True), gr.update(interactive=True, value=None)), inputs=None, outputs=[txt_box, audio_record], queue=False)
+    FOOTNOTE = """
+            This Space demonstrates how to speak to an llm chatbot, based solely on open accessible models.
+            It relies on the following models :
+            - Speech to Text Model: [facebook/wav2vec2-base-960h](https://huggingface.co/facebook/wav2vec2-base-960h) an ASR model, to transcribe recorded audio to text.
+            - Large Language Model: [mistralai/Mistral-7B-Instruct-v0.1](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1) a LLM to generate the chatbot responses.
+            - Text to Speech Model: [edge-tts](https://pypi.org/project/edge-tts/) a TTS model, to generate the voice of the chatbot.
+            Note:
+            - Responses generated by chat model should not be assumed correct or taken serious, as this is a demonstration example only
+            - iOS (Iphone/Ipad) devices may not experience voice due to autoplay being disabled on these devices by Vendor"""
+    gr.Markdown(FOOTNOTE)
+    demo.load(lambda *args: wrap_async_generator(generate_speech, *args), inputs=[chatbot, chatbot_voice, gr.State(value=True)], outputs=[sentence, chatbot, audio_playback])
+demo.queue().launch(debug=True, share=True)