AIVoice3

Build error

App Files Files Community

dschandra commited on Jan 2, 2025

Commit

a843dd5

verified ·

1 Parent(s): 30e1c24

Update app.py

Browse files

Files changed (1) hide show

app.py +112 -66

app.py CHANGED Viewed

@@ -1,17 +1,18 @@
 from flask import Flask, render_template_string, request, jsonify
-from datetime import datetime
-import speech_recognition as sr  # Import speech recognition
 from tempfile import NamedTemporaryFile
-import ffmpeg
 import os
 app = Flask(__name__)
-# Initialize an empty list to store orders
-orders = []
-user_preferences = {"diet": "all"}  # Default to all
-# HTML code for the frontend
 html_code = """
 <!DOCTYPE html>
 <html lang="en">
@@ -75,60 +76,69 @@ html_code = """
 <body>
     <h1>AI Dining Assistant</h1>
     <button class="mic-button" id="mic-button">🎤</button>
-    <div class="status" id="status">Press the mic button to start listening...</div>
     <div class="response" id="response" style="display: none;">Response will appear here...</div>
     <script>
         const micButton = document.getElementById('mic-button');
         const status = document.getElementById('status');
         const response = document.getElementById('response');
-        if (!window.MediaRecorder) {
-            alert("Your browser does not support audio recording.");
-        }
         let mediaRecorder;
         let audioChunks = [];
-        micButton.addEventListener('click', async () => {
-            navigator.mediaDevices.getUserMedia({ audio: true })
-                .then(stream => {
-                    mediaRecorder = new MediaRecorder(stream);
-                    mediaRecorder.start();
-                    status.textContent = 'Listening...';
-                    status.classList.add('listening');
-                    audioChunks = [];
-                    mediaRecorder.ondataavailable = event => {
-                        audioChunks.push(event.data);
-                    };
-                    mediaRecorder.onstop = async () => {
-                        const audioBlob = new Blob(audioChunks, { type: 'audio/wav; codecs=LINEAR16' });
-                        const formData = new FormData();
-                        formData.append('audio', audioBlob);
-                        status.textContent = 'Processing...';
-                        status.classList.remove('listening');
-                        try {
-                            const result = await fetch('/process-audio', {
-                                method: 'POST',
-                                body: formData,
-                            });
-                            const data = await result.json();
-                            response.textContent = data.response;
-                            response.style.display = 'block';
-                            status.textContent = 'Press the mic button to start listening...';
-                            // Use browser text-to-speech
-                            const utterance = new SpeechSynthesisUtterance(data.response);
-                            speechSynthesis.speak(utterance);
-                        } catch (error) {
-                            response.textContent = 'Error occurred. Please try again.';
-                            response.style.display = 'block';
-                            status.textContent = 'Press the mic button to start listening...';
-                        }
-                    };
-                    setTimeout(() => {
-                        mediaRecorder.stop();
-                    }, 5000); // Stop recording after 5 seconds
-                })
-                .catch(err => {
-                    status.textContent = 'Microphone access denied.';
-                });
         });
     </script>
 </body>
 </html>
@@ -141,37 +151,73 @@ def index():
 @app.route('/process-audio', methods=['POST'])
 def process_audio():
     try:
-        audio_file = request.files['audio']
-        # Save the uploaded file temporarily
-        temp_file = NamedTemporaryFile(delete=False, suffix=".wav")
         audio_file.save(temp_file.name)
-        # Convert the file to PCM WAV format if necessary
         converted_file = NamedTemporaryFile(delete=False, suffix=".wav")
-        ffmpeg.input(temp_file.name).output(converted_file.name, acodec='pcm_s16le', ac=1, ar='16000').run(overwrite_output=True)
         recognizer = sr.Recognizer()
         with sr.AudioFile(converted_file.name) as source:
             audio_data = recognizer.record(source)
             command = recognizer.recognize_google(audio_data)
             response = process_command(command)
-        # Clean up temporary files
-        os.unlink(temp_file.name)
-        os.unlink(converted_file.name)
         return jsonify({"response": response})
     except Exception as e:
         return jsonify({"response": f"An error occurred: {str(e)}"})
 def process_command(command):
     """Process the user's voice command and return a response."""
-    global orders
     command = command.lower()
     if "menu" in command:
-        return "Our menu includes paneer butter masala, fried rice, and cold coffee."
     elif "order" in command:
-        return "Your order has been placed."
-    return "Sorry, I didn't understand your request."
 if __name__ == "__main__":
     app.run(host="0.0.0.0", port=7860)

 from flask import Flask, render_template_string, request, jsonify
+import speech_recognition as sr
 from tempfile import NamedTemporaryFile
 import os
+import ffmpeg
+import logging
+from werkzeug.exceptions import BadRequest
+# Initialize Flask App
 app = Flask(__name__)
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+# HTML Template for Frontend
 html_code = """
 <!DOCTYPE html>
 <html lang="en">
 <body>
     <h1>AI Dining Assistant</h1>
     <button class="mic-button" id="mic-button">🎤</button>
+    <div class="status" id="status">Press the mic button to start the conversation...</div>
     <div class="response" id="response" style="display: none;">Response will appear here...</div>
     <script>
         const micButton = document.getElementById('mic-button');
         const status = document.getElementById('status');
         const response = document.getElementById('response');
         let mediaRecorder;
         let audioChunks = [];
+        let isConversationActive = false;
+        micButton.addEventListener('click', () => {
+            if (!isConversationActive) {
+                isConversationActive = true;
+                startConversation();
+            }
         });
+        function startConversation() {
+            status.textContent = 'Listening...';
+            startListening();
+        }
+        function startListening() {
+            navigator.mediaDevices.getUserMedia({ audio: true }).then(stream => {
+                mediaRecorder = new MediaRecorder(stream, { mimeType: 'audio/webm;codecs=opus' });
+                mediaRecorder.start();
+                audioChunks = [];
+                mediaRecorder.ondataavailable = event => audioChunks.push(event.data);
+                mediaRecorder.onstop = async () => {
+                    const audioBlob = new Blob(audioChunks, { type: 'audio/webm' });
+                    const formData = new FormData();
+                    formData.append('audio', audioBlob);
+                    status.textContent = 'Processing...';
+                    try {
+                        const result = await fetch('/process-audio', { method: 'POST', body: formData });
+                        const data = await result.json();
+                        response.textContent = data.response;
+                        response.style.display = 'block';
+                        const utterance = new SpeechSynthesisUtterance(data.response);
+                        speechSynthesis.speak(utterance);
+                        if (data.response.includes("Goodbye")) {
+                            status.textContent = 'Conversation ended. Press the mic button to start again.';
+                            isConversationActive = false;
+                        } else {
+                            status.textContent = 'Listening...';
+                            setTimeout(startListening, 1000); // Continue listening
+                        }
+                    } catch (error) {
+                        response.textContent = 'Error occurred. Please try again.';
+                        response.style.display = 'block';
+                        status.textContent = 'Press the mic button to restart the conversation.';
+                        isConversationActive = false;
+                    }
+                };
+                setTimeout(() => mediaRecorder.stop(), 5000); // Stop recording after 5 seconds
+            }).catch(() => {
+                status.textContent = 'Microphone access denied.';
+                isConversationActive = false;
+            });
+        }
     </script>
 </body>
 </html>
 @app.route('/process-audio', methods=['POST'])
 def process_audio():
     try:
+        # Validate audio file
+        audio_file = request.files.get('audio')
+        if not audio_file:
+            raise BadRequest("No audio file provided.")
+        temp_file = NamedTemporaryFile(delete=False, suffix=".webm")
         audio_file.save(temp_file.name)
+        logging.info(f"Saved input audio to {temp_file.name}")
+        if os.path.getsize(temp_file.name) == 0:
+            raise BadRequest("Uploaded audio file is empty.")
+        # Convert audio to PCM WAV format
         converted_file = NamedTemporaryFile(delete=False, suffix=".wav")
+        try:
+            ffmpeg.input(temp_file.name).output(
+                converted_file.name, acodec='pcm_s16le', ac=1, ar='16000'
+            ).run(overwrite_output=True)
+        except Exception as ffmpeg_error:
+            logging.error(f"FFmpeg conversion error: {str(ffmpeg_error)}")
+            return jsonify({"response": "Audio conversion failed. Please try again."})
+        logging.info(f"Converted audio saved to {converted_file.name}")
+        # Recognize speech
         recognizer = sr.Recognizer()
         with sr.AudioFile(converted_file.name) as source:
             audio_data = recognizer.record(source)
             command = recognizer.recognize_google(audio_data)
+            logging.info(f"Recognized command: {command}")
             response = process_command(command)
         return jsonify({"response": response})
+    except BadRequest as br:
+        logging.error(f"Bad request error: {br}")
+        return jsonify({"response": f"Bad Request: {str(br)}"})
     except Exception as e:
+        logging.error(f"Error processing audio: {e}")
         return jsonify({"response": f"An error occurred: {str(e)}"})
+    finally:
+        # Clean up temporary files
+        try:
+            if os.path.exists(temp_file.name):
+                os.unlink(temp_file.name)
+            if os.path.exists(converted_file.name):
+                os.unlink(converted_file.name)
+        except Exception as cleanup_error:
+            logging.error(f"Error cleaning up files: {cleanup_error}")
 def process_command(command):
     """Process the user's voice command and return a response."""
     command = command.lower()
     if "menu" in command:
+        return (
+            "Here is our menu: "
+            "South Indian dishes include Idli, Dosa, Vada, Pongal, Biryani, and Sambar Rice. "
+            "North Indian dishes include Butter Chicken, Paneer Butter Masala, Naan, Dal Makhani, Chole Bhature, and Rajma Chawal. "
+            "What would you like to order?"
+        )
     elif "order" in command:
+        return "Your order has been placed. Would you like anything else?"
+    elif "no" in command or "nothing" in command:
+        return "Goodbye! Thank you for using AI Dining Assistant."
+    return "Sorry, I didn't understand your request. Please ask about the menu or place an order."
 if __name__ == "__main__":
     app.run(host="0.0.0.0", port=7860)