nagasurendra commited on
Commit
710a6ce
·
verified ·
1 Parent(s): 69a28fe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -167
app.py CHANGED
@@ -1,16 +1,19 @@
1
  from flask import Flask, render_template_string, request, jsonify
2
- import speech_recognition as sr
3
  from tempfile import NamedTemporaryFile
4
- import os
5
  import ffmpeg
6
  from fuzzywuzzy import process
7
- import phonetics
8
  import logging
9
 
 
10
  app = Flask(__name__)
11
  logging.basicConfig(level=logging.INFO)
12
 
13
- # Global variables
 
 
 
14
  cart = {}
15
  menu_preferences = "all"
16
  prices = {
@@ -65,24 +68,18 @@ def process_audio():
65
  converted_file.name, acodec="pcm_s16le", ac=1, ar="16000"
66
  ).run(overwrite_output=True)
67
 
68
- recognizer = sr.Recognizer()
69
- recognizer.dynamic_energy_threshold = True
70
- recognizer.energy_threshold = 100 # Sensitive for low audio levels
 
71
 
72
- with sr.AudioFile(converted_file.name) as source:
73
- audio_data = recognizer.record(source)
74
- raw_command = recognizer.recognize_google(audio_data).lower()
75
- logging.info(f"Raw recognized command: {raw_command}")
76
 
77
- # Preprocess command
78
- all_menu_items = menus["all"]
79
- command = preprocess_command(raw_command, all_menu_items)
80
 
81
- # Pass preprocessed command to process_command
82
- response = process_command(command)
83
-
84
- except sr.UnknownValueError:
85
- response = "Sorry, I couldn't understand. Please try again."
86
  except Exception as e:
87
  response = f"An error occurred: {str(e)}"
88
  finally:
@@ -91,55 +88,34 @@ def process_audio():
91
 
92
  return jsonify({"response": response})
93
 
94
-
95
  def preprocess_command(command, menu_items):
96
  """
97
  Preprocess the user command:
98
  - Normalize speech for accents and speed using fuzzy matching.
99
- - Phonetically match menu items.
100
  """
101
- def phonetic_match(word, options):
102
- word_phonetic = phonetics.metaphone(word)
103
- for option in options:
104
- if phonetics.metaphone(option) == word_phonetic:
105
- return option
106
- return None
107
-
108
- # First, try fuzzy matching
109
  closest_match = process.extractOne(command, menu_items)
110
  if closest_match and closest_match[1] > 70: # Adjust fuzzy match threshold
111
  return closest_match[0]
112
-
113
- # Fallback to phonetic matching
114
- words = command.split()
115
- for word in words:
116
- match = phonetic_match(word, menu_items)
117
- if match:
118
- return match
119
-
120
  return command
121
 
122
-
123
  def process_command(command):
124
  global cart, menu_preferences
125
  command = command.lower()
126
 
127
- # Recognize menu preferences explicitly
128
  if menu_preferences == "all":
129
  if "non-vegetarian" in command:
130
  menu_preferences = "non-vegetarian"
131
- return "You have chosen the Non-Vegetarian menu. To view menu say menu"
132
  elif "vegetarian" in command and "non-vegetarian" not in command:
133
  menu_preferences = "vegetarian"
134
- return "You have chosen the Vegetarian menu. To view menu say menu"
135
  elif "guilt-free" in command:
136
  menu_preferences = "guilt-free"
137
- return "You have chosen the Guilt-Free menu. To view menu say menu"
138
  elif "all" in command:
139
  menu_preferences = "all"
140
- return "You have chosen the complete menu. To view menu say menu"
141
 
142
- # Filtered menu based on preference
143
  menu = menus.get(menu_preferences, menus["all"])
144
 
145
  if "menu" in command:
@@ -152,7 +128,6 @@ def process_command(command):
152
  return f"The price of {matched_item} is ${prices[matched_item]}."
153
  return "Sorry, I couldn't find that item in the menu."
154
  elif "remove" in command:
155
- # Extract the item name after "remove"
156
  item = command.replace("remove", "").strip()
157
  closest_match = process.extractOne(item, list(cart.keys()))
158
  if closest_match and closest_match[1] > 70:
@@ -169,12 +144,12 @@ def process_command(command):
169
  if closest_match and closest_match[1] > 70:
170
  matched_item = closest_match[0]
171
  cart[matched_item] = cart.get(matched_item, 0) + 1
172
- return f"{matched_item.capitalize()} added to your cart. Current cart: {dict(cart)}. To finalize say final order"
173
  return "Sorry, I couldn't recognize the item. Could you try again?"
174
  elif "final order" in command:
175
  if cart:
176
  total = sum(prices[item] * count for item, count in cart.items())
177
- response = f"Your final order is: {', '.join(f'{item} x{count}' for item, count in cart.items())}. Your total bill is ${total}. Thank you for ordering! To exist this conversation say nothing or good bye!"
178
  cart.clear()
179
  return response
180
  return "Your cart is empty. Please add items to your cart first."
@@ -184,138 +159,26 @@ def process_command(command):
184
  return "Goodbye! Thank you for using AI Dining Assistant."
185
  return "Sorry, I couldn't understand that. Please try again."
186
 
187
-
188
- html_code = """
189
- <!DOCTYPE html>
190
  <html lang="en">
191
  <head>
192
  <meta charset="UTF-8">
193
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
194
  <title>AI Dining Assistant</title>
195
- <style>
196
- body {
197
- display: flex;
198
- flex-direction: column;
199
- align-items: center;
200
- justify-content: center;
201
- min-height: 100vh;
202
- margin: 0;
203
- font-family: Arial, sans-serif;
204
- background-color: #f4f4f9;
205
- }
206
- h1 {
207
- color: #333;
208
- }
209
- .mic-button {
210
- font-size: 2rem;
211
- padding: 1rem 2rem;
212
- color: white;
213
- background-color: #007bff;
214
- border: none;
215
- border-radius: 50px;
216
- cursor: pointer;
217
- transition: background-color 0.3s;
218
- }
219
- .mic-button:hover {
220
- background-color: #0056b3;
221
- }
222
- .status, .response {
223
- margin-top: 1rem;
224
- text-align: center;
225
- color: #555;
226
- font-size: 1.2rem;
227
- }
228
- .response {
229
- background-color: #e8e8ff;
230
- padding: 1rem;
231
- border-radius: 10px;
232
- box-shadow: 0 4px 8px rgba(0, 0, 0, 0.2);
233
- display: none;
234
- }
235
- </style>
236
  </head>
237
  <body>
238
  <h1>AI Dining Assistant</h1>
239
- <button class="mic-button" id="mic-button">🎤</button>
240
- <div class="status" id="status">Press the mic button to start...</div>
241
- <div class="response" id="response">Response will appear here...</div>
242
  <script>
243
  const micButton = document.getElementById('mic-button');
244
- const status = document.getElementById('status');
245
- const response = document.getElementById('response');
246
- let mediaRecorder;
247
- let audioChunks = [];
248
- let isConversationActive = false;
249
  micButton.addEventListener('click', () => {
250
- if (!isConversationActive) {
251
- isConversationActive = true;
252
- startConversation();
253
- }
254
  });
255
- function startConversation() {
256
- const utterance = new SpeechSynthesisUtterance('Please choose your preference: All, Vegetarian, Non-Vegetarian, or Guilt-Free.');
257
- speechSynthesis.speak(utterance);
258
- utterance.onend = () => {
259
- status.textContent = 'Listening...';
260
- startListening();
261
- };
262
- }
263
- function startListening() {
264
- navigator.mediaDevices.getUserMedia({ audio: true }).then(stream => {
265
- mediaRecorder = new MediaRecorder(stream, { mimeType: 'audio/webm;codecs=opus' });
266
- mediaRecorder.start();
267
- audioChunks = [];
268
- mediaRecorder.ondataavailable = event => audioChunks.push(event.data);
269
- mediaRecorder.onstop = async () => {
270
- const audioBlob = new Blob(audioChunks, { type: 'audio/webm' });
271
- const formData = new FormData();
272
- formData.append('audio', audioBlob);
273
- status.textContent = 'Processing...';
274
- try {
275
- const result = await fetch('/process-audio', { method: 'POST', body: formData });
276
- const data = await result.json();
277
- response.textContent = data.response;
278
- response.style.display = 'block';
279
- const utterance = new SpeechSynthesisUtterance(data.response);
280
- speechSynthesis.speak(utterance);
281
- utterance.onend = () => {
282
- console.log("Speech synthesis completed.");
283
- if (data.response.includes("Goodbye")) {
284
- status.textContent = 'Conversation ended. Press the mic button to start again.';
285
- isConversationActive = false;
286
- fetch('/reset-cart'); // Reset the cart dynamically on end
287
- } else if (data.response.includes("Your order is complete")) {
288
- status.textContent = 'Order complete. Thank you for using AI Dining Assistant.';
289
- isConversationActive = false;
290
- fetch('/reset-cart'); // Reset the cart after final order
291
- } else {
292
- status.textContent = 'Listening...';
293
- setTimeout(() => {
294
- startListening();
295
- }, 100);
296
- }
297
- };
298
- utterance.onerror = (e) => {
299
- console.error("Speech synthesis error:", e.error);
300
- status.textContent = 'Error with speech output.';
301
- isConversationActive = false;
302
- };
303
- } catch (error) {
304
- response.textContent = 'Sorry, I could not understand. Please try again.';
305
- response.style.display = 'block';
306
- status.textContent = 'Press the mic button to restart the conversation.';
307
- isConversationActive = false;
308
- }
309
- };
310
- setTimeout(() => mediaRecorder.stop(), 5000);
311
- }).catch(() => {
312
- status.textContent = 'Microphone access denied.';
313
- isConversationActive = false;
314
- });
315
- }
316
  </script>
317
  </body>
318
- </html>
319
- """
320
  if __name__ == "__main__":
321
- app.run(host="0.0.0.0", port=7860)
 
1
  from flask import Flask, render_template_string, request, jsonify
 
2
  from tempfile import NamedTemporaryFile
3
+ import whisper
4
  import ffmpeg
5
  from fuzzywuzzy import process
6
+ import os
7
  import logging
8
 
9
+ # Flask app
10
  app = Flask(__name__)
11
  logging.basicConfig(level=logging.INFO)
12
 
13
+ # Whisper Model
14
+ model = whisper.load_model("base") # Use 'base' or 'large' for better accuracy
15
+
16
+ # Global Variables
17
  cart = {}
18
  menu_preferences = "all"
19
  prices = {
 
68
  converted_file.name, acodec="pcm_s16le", ac=1, ar="16000"
69
  ).run(overwrite_output=True)
70
 
71
+ # Use Whisper for transcription
72
+ result = model.transcribe(converted_file.name)
73
+ raw_command = result["text"].lower()
74
+ logging.info(f"Raw recognized command: {raw_command}")
75
 
76
+ # Preprocess command
77
+ all_menu_items = menus["all"]
78
+ command = preprocess_command(raw_command, all_menu_items)
 
79
 
80
+ # Process command
81
+ response = process_command(command)
 
82
 
 
 
 
 
 
83
  except Exception as e:
84
  response = f"An error occurred: {str(e)}"
85
  finally:
 
88
 
89
  return jsonify({"response": response})
90
 
 
91
  def preprocess_command(command, menu_items):
92
  """
93
  Preprocess the user command:
94
  - Normalize speech for accents and speed using fuzzy matching.
 
95
  """
 
 
 
 
 
 
 
 
96
  closest_match = process.extractOne(command, menu_items)
97
  if closest_match and closest_match[1] > 70: # Adjust fuzzy match threshold
98
  return closest_match[0]
 
 
 
 
 
 
 
 
99
  return command
100
 
 
101
  def process_command(command):
102
  global cart, menu_preferences
103
  command = command.lower()
104
 
 
105
  if menu_preferences == "all":
106
  if "non-vegetarian" in command:
107
  menu_preferences = "non-vegetarian"
108
+ return "You have chosen the Non-Vegetarian menu. To view menu say menu."
109
  elif "vegetarian" in command and "non-vegetarian" not in command:
110
  menu_preferences = "vegetarian"
111
+ return "You have chosen the Vegetarian menu. To view menu say menu."
112
  elif "guilt-free" in command:
113
  menu_preferences = "guilt-free"
114
+ return "You have chosen the Guilt-Free menu. To view menu say menu."
115
  elif "all" in command:
116
  menu_preferences = "all"
117
+ return "You have chosen the complete menu. To view menu say menu."
118
 
 
119
  menu = menus.get(menu_preferences, menus["all"])
120
 
121
  if "menu" in command:
 
128
  return f"The price of {matched_item} is ${prices[matched_item]}."
129
  return "Sorry, I couldn't find that item in the menu."
130
  elif "remove" in command:
 
131
  item = command.replace("remove", "").strip()
132
  closest_match = process.extractOne(item, list(cart.keys()))
133
  if closest_match and closest_match[1] > 70:
 
144
  if closest_match and closest_match[1] > 70:
145
  matched_item = closest_match[0]
146
  cart[matched_item] = cart.get(matched_item, 0) + 1
147
+ return f"{matched_item.capitalize()} added to your cart. Current cart: {dict(cart)}. To finalize say final order."
148
  return "Sorry, I couldn't recognize the item. Could you try again?"
149
  elif "final order" in command:
150
  if cart:
151
  total = sum(prices[item] * count for item, count in cart.items())
152
+ response = f"Your final order is: {', '.join(f'{item} x{count}' for item, count in cart.items())}. Your total bill is ${total}. Thank you for ordering! To exit this conversation say nothing or goodbye."
153
  cart.clear()
154
  return response
155
  return "Your cart is empty. Please add items to your cart first."
 
159
  return "Goodbye! Thank you for using AI Dining Assistant."
160
  return "Sorry, I couldn't understand that. Please try again."
161
 
162
+ html_code = """<!DOCTYPE html>
 
 
163
  <html lang="en">
164
  <head>
165
  <meta charset="UTF-8">
166
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
167
  <title>AI Dining Assistant</title>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
  </head>
169
  <body>
170
  <h1>AI Dining Assistant</h1>
171
+ <button id="mic-button">🎤 Speak</button>
172
+ <div id="response"></div>
 
173
  <script>
174
  const micButton = document.getElementById('mic-button');
175
+ const responseDiv = document.getElementById('response');
 
 
 
 
176
  micButton.addEventListener('click', () => {
177
+ alert('Audio capture functionality not implemented in this sample HTML.');
 
 
 
178
  });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
  </script>
180
  </body>
181
+ </html>"""
182
+
183
  if __name__ == "__main__":
184
+ app.run(host="0.0.0.0", port=7860)