Mood_Based_Music_Recommender

Sleeping

App Files Files Community

arjunanand13 commited on Jul 4

Commit

36147d9

verified ·

1 Parent(s): e64fc8c

Update app.py

Browse files

Files changed (1) hide show

app.py +67 -206

app.py CHANGED Viewed

@@ -1,19 +1,15 @@
 import gradio as gr
-from huggingface_hub import InferenceClient
 from transformers import pipeline
 import edge_tts
 import tempfile
 import asyncio
 import os
 hf_token = os.getenv("HF_TOKEN")
-if hf_token:
-    client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.1", token=hf_token)
-else:
-    print("Warning: No HF_TOKEN found. Please set your Hugging Face token.")
-    client = None
 asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
 INITIAL_MESSAGE = "Hi! I'm your music buddy—tell me about your mood and the type of tunes you're in the mood for today!"
@@ -31,37 +27,40 @@ def classify_mood(input_string):
             return word, True
     return None, False
-def generate(prompt, history, temperature=0.1, max_new_tokens=2048, top_p=0.8, repetition_penalty=1.0):
-    if client is None:
         return "Error: Hugging Face authentication required. Please set your HF_TOKEN."
-    temperature = float(temperature)
-    if temperature < 1e-2:
-        temperature = 1e-2
-    top_p = float(top_p)
-    generate_kwargs = dict(
-        temperature=temperature,
-        max_new_tokens=max_new_tokens,
-        top_p=top_p,
-        repetition_penalty=repetition_penalty,
-        do_sample=True,
-        seed=42,
-    )
     formatted_prompt = format_prompt(prompt, history)
     try:
-        stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
-        output = ""
-        for response in stream:
-            output += response.token.text
             mood, is_classified = classify_mood(output)
             if is_classified:
                 playlist_message = f"Playing {mood.capitalize()} playlist for you!"
                 return playlist_message
-        return output
     except Exception as e:
         return f"Error generating response: {str(e)}"
@@ -107,11 +106,6 @@ def process_input(input_text, history):
     history.append((input_text, response))
     return history, history, ""
-def handle_voice_input(audio_file):
-    if audio_file is None:
-        return ""
-    return speech_to_text(audio_file)
 async def generate_audio(history):
     if history and len(history) > 0:
         last_response = history[-1][1]
@@ -124,206 +118,73 @@ async def init_chat():
     audio_path = await text_to_speech(INITIAL_MESSAGE)
     return history, history, audio_path
 with gr.Blocks() as demo:
-    gr.Markdown("# Mood-Based Music Recommender with Continuous Voice Chat")
     chatbot = gr.Chatbot()
-    msg = gr.Textbox(placeholder="Type your message here or use the microphone to speak...")
     audio_output = gr.Audio(label="AI Response", autoplay=True)
     state = gr.State([])
-    with gr.Row():
-        submit = gr.Button("Send")
-        voice_input = gr.Audio(
-            sources=["microphone"],
-            type="filepath",
-            label="Voice Input"
-        )
     demo.load(init_chat, outputs=[state, chatbot, audio_output])
     msg.submit(
-        process_input,
-        inputs=[msg, state],
         outputs=[state, chatbot, msg]
     ).then(
-        generate_audio,
-        inputs=[state],
         outputs=[audio_output]
     )
     submit.click(
-        process_input,
-        inputs=[msg, state],
         outputs=[state, chatbot, msg]
     ).then(
-        generate_audio,
-        inputs=[state],
         outputs=[audio_output]
     )
     voice_input.upload(
-        handle_voice_input,
         inputs=[voice_input],
         outputs=[msg]
     ).then(
-        process_input,
-        inputs=[msg, state],
         outputs=[state, chatbot, msg]
     ).then(
-        generate_audio,
-        inputs=[state],
         outputs=[audio_output]
     )
 if __name__ == "__main__":
-    demo.launch(share=True)
-# import gradio as gr
-# from huggingface_hub import InferenceClient
-# from transformers import pipeline
-# import edge_tts
-# import tempfile
-# import asyncio
-# # Initialize the inference client with your Hugging Face token
-# client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.1")
-# # Initialize the ASR pipeline
-# asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
-# INITIAL_MESSAGE = "Hi! I'm your music buddy—tell me about your mood and the type of tunes you're in the mood for today!"
-# def speech_to_text(speech):
-#     """Converts speech to text using the ASR pipeline."""
-#     return asr(speech)["text"]
-# def classify_mood(input_string):
-#     """Classifies the mood based on keywords in the input string."""
-#     input_string = input_string.lower()
-#     mood_words = {"happy", "sad", "instrumental", "party"}
-#     for word in mood_words:
-#         if word in input_string:
-#             return word, True
-#     return None, False
-# def generate(prompt, history, temperature=0.1, max_new_tokens=2048, top_p=0.8, repetition_penalty=1.0):
-#     temperature = float(temperature)
-#     if temperature < 1e-2:
-#         temperature = 1e-2
-#     top_p = float(top_p)
-#     generate_kwargs = dict(
-#         temperature=temperature,
-#         max_new_tokens=max_new_tokens,
-#         top_p=top_p,
-#         repetition_penalty=repetition_penalty,
-#         do_sample=True,
-#         seed=42,
-#     )
-#     formatted_prompt = format_prompt(prompt, history)
-#     stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
-#     output = ""
-#     for response in stream:
-#         output += response.token.text
-#         mood, is_classified = classify_mood(output)
-#         if is_classified:
-#             playlist_message = f"Playing {mood.capitalize()} playlist for you!"
-#             return playlist_message
-#     return output
-# def format_prompt(message, history):
-#     """Formats the prompt including fixed instructions and conversation history."""
-#     fixed_prompt = """
-#     You are a smart mood analyzer tasked with determining the user's mood for a music recommendation system. Your goal is to classify the user's mood into one of four categories: Happy, Sad, Instrumental, or Party.
-#     Instructions:
-#     1. Engage in a conversation with the user to understand their mood.
-#     2. Ask relevant questions to guide the conversation towards mood classification.
-#     3. If the user's mood is clear, respond with a single word: "Happy", "Sad", "Instrumental", or "Party".
-#     4. If the mood is unclear, continue the conversation with a follow-up question.
-#     5. Limit the conversation to a maximum of 5 exchanges.
-#     6. Do not classify the mood prematurely if it's not evident from the user's responses.
-#     7. Focus on the user's emotional state rather than specific activities or preferences.
-#     8. If unable to classify after 5 exchanges, respond with "Unclear" to indicate the need for more information.
-#     Remember: Your primary goal is mood classification. Stay on topic and guide the conversation towards understanding the user's emotional state.
-#     """
-#     prompt = f"{fixed_prompt}\n"
-#     # Add conversation history
-#     for i, (user_prompt, bot_response) in enumerate(history):
-#         prompt += f"User: {user_prompt}\nAssistant: {bot_response}\n"
-#         if i == 3:  # This is the 4th exchange (0-indexed)
-#             prompt += "Note: This is the last exchange. Classify the mood if possible or respond with 'Unclear'.\n"
-#     prompt += f"User: {message}\nAssistant:"
-#     return prompt
-# async def text_to_speech(text):
-#     communicate = edge_tts.Communicate(text)
-#     with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
-#         tmp_path = tmp_file.name
-#         await communicate.save(tmp_path)
-#     return tmp_path
-# def process_input(input_text, history):
-#     if not input_text:
-#         return history, history, "", None
-#     response = generate(input_text, history)
-#     history.append((input_text, response))
-#     return history, history, "", None
-# async def generate_audio(history):
-#     if history and len(history) > 0:
-#         last_response = history[-1][1]
-#         audio_path = await text_to_speech(last_response)
-#         return audio_path
-#     return None
-# async def init_chat():
-#     history = [("", INITIAL_MESSAGE)]
-#     audio_path = await text_to_speech(INITIAL_MESSAGE)
-#     return history, history, audio_path
-# # Gradio interface setup
-# with gr.Blocks() as demo:
-#     gr.Markdown("# Mood-Based Music Recommender with Continuous Voice Chat")
-#     chatbot = gr.Chatbot()
-#     msg = gr.Textbox(placeholder="Type your message here or use the microphone to speak...")
-#     audio_output = gr.Audio(label="AI Response", autoplay=True)
-#     state = gr.State([])
-#     with gr.Row():
-#         submit = gr.Button("Send")
-#         voice_input = gr.Audio(sources="microphone", type="filepath", label="Voice Input")
-#     # Initialize chat with greeting
-#     demo.load(init_chat, outputs=[state, chatbot, audio_output])
-#     # Handle text input
-#     msg.submit(process_input, inputs=[msg, state], outputs=[state, chatbot, msg, voice_input]).then(
-#         generate_audio, inputs=[state], outputs=[audio_output]
-#     )
-#     submit.click(process_input, inputs=[msg, state], outputs=[state, chatbot, msg, voice_input]).then(
-#         generate_audio, inputs=[state], outputs=[audio_output]
-#     )
-#     # Handle voice input
-#     voice_input.stop_recording(
-#         lambda x: speech_to_text(x) if x else "",
-#         inputs=[voice_input],
-#         outputs=[msg]
-#     ).then(
-#         process_input, inputs=[msg, state], outputs=[state, chatbot, msg, voice_input]
-#     ).then(
-#         generate_audio, inputs=[state], outputs=[audio_output]
-#     )
-# if __name__ == "__main__":
-#     demo.launch(share=True)

 import gradio as gr
+import requests
 from transformers import pipeline
 import edge_tts
 import tempfile
 import asyncio
 import os
+import json
+ENDPOINT_URL = "https://l8opkfvazwgxqljm.us-east-1.aws.endpoints.huggingface.cloud"
 hf_token = os.getenv("HF_TOKEN")
 asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
 INITIAL_MESSAGE = "Hi! I'm your music buddy—tell me about your mood and the type of tunes you're in the mood for today!"
             return word, True
     return None, False
+def generate(prompt, history, temperature=0.1, max_new_tokens=2048):
+    if not hf_token:
         return "Error: Hugging Face authentication required. Please set your HF_TOKEN."
     formatted_prompt = format_prompt(prompt, history)
+    headers = {
+        "Authorization": f"Bearer {hf_token}",
+        "Content-Type": "application/json"
+    }
+    payload = {
+        "model": "meta-llama/Llama-3.1-8B-Instruct",
+        "messages": [{"role": "user", "content": formatted_prompt}],
+        "temperature": temperature,
+        "max_tokens": max_new_tokens,
+        "stream": False
+    }
     try:
+        response = requests.post(f"{ENDPOINT_URL}/v1/chat/completions", headers=headers, json=payload)
+        if response.status_code == 200:
+            result = response.json()
+            output = result["choices"][0]["message"]["content"]
             mood, is_classified = classify_mood(output)
             if is_classified:
                 playlist_message = f"Playing {mood.capitalize()} playlist for you!"
                 return playlist_message
+            return output
+        else:
+            return f"Error: {response.status_code} - {response.text}"
     except Exception as e:
         return f"Error generating response: {str(e)}"
     history.append((input_text, response))
     return history, history, ""
 async def generate_audio(history):
     if history and len(history) > 0:
         last_response = history[-1][1]
     audio_path = await text_to_speech(INITIAL_MESSAGE)
     return history, history, audio_path
+def handle_voice_upload(audio_file):
+    if audio_file is None:
+        return ""
+    return speech_to_text(audio_file)
 with gr.Blocks() as demo:
+    gr.Markdown("# Mood-Based Music Recommender with Voice Chat")
     chatbot = gr.Chatbot()
+    with gr.Row():
+        msg = gr.Textbox(
+            placeholder="Type your message here...",
+            label="Text Input",
+            scale=4
+        )
+        submit = gr.Button("Send", scale=1)
+    with gr.Row():
+        voice_input = gr.File(
+            label="Upload Voice Recording (or record using your device)",
+            file_types=[".wav", ".mp3", ".m4a", ".ogg"]
+        )
     audio_output = gr.Audio(label="AI Response", autoplay=True)
     state = gr.State([])
     demo.load(init_chat, outputs=[state, chatbot, audio_output])
+    def submit_and_generate_audio(input_text, history):
+        new_state, new_chatbot, empty_msg = process_input(input_text, history)
+        return new_state, new_chatbot, empty_msg
     msg.submit(
+        submit_and_generate_audio,
+        inputs=[msg, state],
         outputs=[state, chatbot, msg]
     ).then(
+        generate_audio,
+        inputs=[state],
         outputs=[audio_output]
     )
     submit.click(
+        submit_and_generate_audio,
+        inputs=[msg, state],
         outputs=[state, chatbot, msg]
     ).then(
+        generate_audio,
+        inputs=[state],
         outputs=[audio_output]
     )
     voice_input.upload(
+        handle_voice_upload,
         inputs=[voice_input],
         outputs=[msg]
     ).then(
+        submit_and_generate_audio,
+        inputs=[msg, state],
         outputs=[state, chatbot, msg]
     ).then(
+        generate_audio,
+        inputs=[state],
         outputs=[audio_output]
     )
 if __name__ == "__main__":
+    demo.launch(share=True)