Spaces:

Fluospark128
/

Mind_Aid

Build error

App Files Files Community

Fluospark128 commited on May 2, 2025

Commit

665b958

verified ·

1 Parent(s): 4d5bf9e

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -25

app.py CHANGED Viewed

@@ -4,7 +4,6 @@ import requests
 import json
 import speech_recognition as sr
 from tempfile import NamedTemporaryFile
-import pyttsx3
 import logging
 import time
 from huggingface_hub import HfApi
@@ -15,7 +14,6 @@ logger = logging.getLogger(__name__)
 # Environment Variables
 HF_TOKEN = os.environ.get("HF_TOKEN")
-#HF_REPO_ID = os.environ.get("HF_REPO_ID")  # e.g., username/dataset
 GROQ_API_KEY = os.getenv("GROQ_API_KEY")
 GROQ_MODEL = os.getenv("GROQ_MODEL", "mixtral-8x7b-32768")
 GROQ_API_URL = "https://api.groq.com/openai/v1/chat/completions"
@@ -25,9 +23,6 @@ headers = {
     "Content-Type": "application/json"
 }
-# Hugging Face API Client
-hf_api = HfApi()
 # Emotion descriptions
 emotion_options = {
     "neutral": "Neutral or balanced mood",
@@ -44,6 +39,7 @@ emotion_options = {
 conversation_history = []
 # Transcribe audio
 def transcribe_audio(audio_path):
     recognizer = sr.Recognizer()
     try:
@@ -56,6 +52,7 @@ def transcribe_audio(audio_path):
         return ""
 # Generate Groq response
 def get_groq_response(prompt, history):
     messages = [{"role": "system", "content": prompt}]
     for msg in history:
@@ -76,31 +73,32 @@ def get_groq_response(prompt, history):
         logger.error(f"Groq API error: {e}")
         return "Error contacting AI."
-# Generate and upload TTS audio
 def generate_speech_and_upload(text):
     try:
-        tts = pyttsx3.init()
         temp_file = NamedTemporaryFile(delete=False, suffix=".wav")
-        audio_path = temp_file.name
-        tts.save_to_file(text, audio_path)
-        tts.runAndWait()
-        time.sleep(1)  # Wait for file to be fully saved
-        hf_path = f"audio_responses/{os.path.basename(audio_path)}"
-        hf_api.upload_file(
-            path_or_fileobj=audio_path,
-            path_in_repo=hf_path,
-            repo_id=HF_REPO_ID,
-            repo_type="dataset",
-            token=HF_TOKEN
-        )
-        return audio_path
     except Exception as e:
-        logger.error(f"TTS or HF upload error: {e}")
         return None
 # Main handler
 def chat_with_ai(audio, text_input, emotion, history):
     global conversation_history
     user_text = text_input or ""
@@ -123,6 +121,7 @@ def chat_with_ai(audio, text_input, emotion, history):
     audio_path = generate_speech_and_upload(ai_response)
     return ai_response, audio_path, history + [[user_text, ai_response]]
 def clear_conversation():
     global conversation_history
     conversation_history = []
@@ -134,13 +133,15 @@ iface = gr.Blocks()
 with iface:
     gr.Markdown("# Mind AID AI Assistant")
     gr.Markdown("Talk or type to the AI assistant. Your emotional state helps tailor the response.")
     with gr.Row():
         with gr.Column(scale=3):
             emotion = gr.Dropdown(label="Your emotion?", choices=list(emotion_options.keys()), value="neutral")
             emotion_description = gr.Markdown("**Current mood:** Neutral")
             def update_emotion_desc(em):
                 return f"**Current mood:** {emotion_options.get(em, 'Unknown')}"
             emotion.change(fn=update_emotion_desc, inputs=[emotion], outputs=[emotion_description])
         with gr.Column(scale=1):
             clear_btn = gr.Button("Clear Conversation")
@@ -173,4 +174,8 @@ with iface:
         outputs=[chat_history, audio_input, text_input, status]
     )
-iface.launch()

 import json
 import speech_recognition as sr
 from tempfile import NamedTemporaryFile
 import logging
 import time
 from huggingface_hub import HfApi
 # Environment Variables
 HF_TOKEN = os.environ.get("HF_TOKEN")
 GROQ_API_KEY = os.getenv("GROQ_API_KEY")
 GROQ_MODEL = os.getenv("GROQ_MODEL", "mixtral-8x7b-32768")
 GROQ_API_URL = "https://api.groq.com/openai/v1/chat/completions"
     "Content-Type": "application/json"
 }
 # Emotion descriptions
 emotion_options = {
     "neutral": "Neutral or balanced mood",
 conversation_history = []
 # Transcribe audio
 def transcribe_audio(audio_path):
     recognizer = sr.Recognizer()
     try:
         return ""
 # Generate Groq response
 def get_groq_response(prompt, history):
     messages = [{"role": "system", "content": prompt}]
     for msg in history:
         logger.error(f"Groq API error: {e}")
         return "Error contacting AI."
+# Generate TTS using Yarngpt
 def generate_speech_and_upload(text):
     try:
+        hf_model_id = "saheedniyi/Yarngpt"
+        inference_url = f"https://api-inference.huggingface.co/models/{hf_model_id}"
+        headers = {"Authorization": f"Bearer {HF_TOKEN}"}
+        payload = {"inputs": text}
+        response = requests.post(inference_url, headers=headers, json=payload)
+        if response.status_code != 200:
+            logger.error(f"Hugging Face TTS API error: {response.text}")
+            return None
         temp_file = NamedTemporaryFile(delete=False, suffix=".wav")
+        with open(temp_file.name, "wb") as f:
+            f.write(response.content)
+        return temp_file.name
     except Exception as e:
+        logger.error(f"Hugging Face TTS error: {e}")
         return None
 # Main handler
 def chat_with_ai(audio, text_input, emotion, history):
     global conversation_history
     user_text = text_input or ""
     audio_path = generate_speech_and_upload(ai_response)
     return ai_response, audio_path, history + [[user_text, ai_response]]
 def clear_conversation():
     global conversation_history
     conversation_history = []
 with iface:
     gr.Markdown("# Mind AID AI Assistant")
     gr.Markdown("Talk or type to the AI assistant. Your emotional state helps tailor the response.")
     with gr.Row():
         with gr.Column(scale=3):
             emotion = gr.Dropdown(label="Your emotion?", choices=list(emotion_options.keys()), value="neutral")
             emotion_description = gr.Markdown("**Current mood:** Neutral")
             def update_emotion_desc(em):
                 return f"**Current mood:** {emotion_options.get(em, 'Unknown')}"
             emotion.change(fn=update_emotion_desc, inputs=[emotion], outputs=[emotion_description])
         with gr.Column(scale=1):
             clear_btn = gr.Button("Clear Conversation")
         outputs=[chat_history, audio_input, text_input, status]
     )
+iface.launch()
+Here is the complete revised code with Yarngpt integrated for text-to-speech output via Hugging Face. Make sure your HF_TOKEN is correctly set in your environment and has access to the model saheedniyi/Yarngpt. Let me know if you need help deploying this.