nagasurendra commited on
Commit
af3db37
·
verified ·
1 Parent(s): 3480c62

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +183 -34
app.py CHANGED
@@ -1,19 +1,17 @@
1
  from flask import Flask, render_template_string, request, jsonify
 
2
  from tempfile import NamedTemporaryFile
3
- import whisper
4
- import ffmpeg
5
- from fuzzywuzzy import process
6
  import os
 
 
 
 
7
  import logging
8
 
9
- # Flask app
10
  app = Flask(__name__)
11
  logging.basicConfig(level=logging.INFO)
12
 
13
- # Whisper Model
14
- model = whisper.load_model("base") # Use 'base' or 'large' for better accuracy
15
-
16
- # Global Variables
17
  cart = {}
18
  menu_preferences = "all"
19
  prices = {
@@ -65,19 +63,39 @@ def process_audio():
65
 
66
  converted_file = NamedTemporaryFile(delete=False, suffix=".wav")
67
  ffmpeg.input(temp_file.name).output(
68
- converted_file.name, acodec="pcm_s16le", ac=1, ar="16000"
69
  ).run(overwrite_output=True)
70
 
71
- # Use Whisper for transcription
72
- result = model.transcribe(converted_file.name)
73
- raw_command = result["text"].lower()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  logging.info(f"Raw recognized command: {raw_command}")
75
 
76
  # Preprocess command
77
  all_menu_items = menus["all"]
78
  command = preprocess_command(raw_command, all_menu_items)
79
 
80
- # Process command
81
  response = process_command(command)
82
 
83
  except Exception as e:
@@ -92,45 +110,65 @@ def preprocess_command(command, menu_items):
92
  """
93
  Preprocess the user command:
94
  - Normalize speech for accents and speed using fuzzy matching.
 
95
  """
96
- closest_match = process.extractOne(command, menu_items)
97
- if closest_match and closest_match[1] > 70: # Adjust fuzzy match threshold
 
 
 
 
 
 
 
 
98
  return closest_match[0]
 
 
 
 
 
 
 
 
99
  return command
100
 
101
  def process_command(command):
102
  global cart, menu_preferences
103
  command = command.lower()
104
 
 
105
  if menu_preferences == "all":
106
  if "non-vegetarian" in command:
107
  menu_preferences = "non-vegetarian"
108
- return "You have chosen the Non-Vegetarian menu. To view menu say menu."
109
  elif "vegetarian" in command and "non-vegetarian" not in command:
110
  menu_preferences = "vegetarian"
111
- return "You have chosen the Vegetarian menu. To view menu say menu."
112
  elif "guilt-free" in command:
113
  menu_preferences = "guilt-free"
114
- return "You have chosen the Guilt-Free menu. To view menu say menu."
115
  elif "all" in command:
116
  menu_preferences = "all"
117
- return "You have chosen the complete menu. To view menu say menu."
118
 
 
119
  menu = menus.get(menu_preferences, menus["all"])
120
 
121
  if "menu" in command:
122
  return f"Here is your menu: {', '.join(menu)}. To select an item say item name."
123
  elif "price of" in command:
124
  item = command.replace("price of", "").strip()
125
- closest_match = process.extractOne(item, prices.keys())
126
- if closest_match and closest_match[1] > 70:
127
  matched_item = closest_match[0]
128
  return f"The price of {matched_item} is ${prices[matched_item]}."
129
  return "Sorry, I couldn't find that item in the menu."
130
  elif "remove" in command:
 
131
  item = command.replace("remove", "").strip()
132
- closest_match = process.extractOne(item, list(cart.keys()))
133
- if closest_match and closest_match[1] > 70:
134
  matched_item = closest_match[0]
135
  if cart[matched_item] > 1:
136
  cart[matched_item] -= 1
@@ -140,16 +178,16 @@ def process_command(command):
140
  return f"{matched_item.capitalize()} has been removed from your cart. Current cart: {dict(cart)}."
141
  return "Sorry, that item is not in your cart."
142
  elif any(item in command for item in menu):
143
- closest_match = process.extractOne(command, menu)
144
- if closest_match and closest_match[1] > 70:
145
  matched_item = closest_match[0]
146
  cart[matched_item] = cart.get(matched_item, 0) + 1
147
- return f"{matched_item.capitalize()} added to your cart. Current cart: {dict(cart)}. To finalize say final order."
148
  return "Sorry, I couldn't recognize the item. Could you try again?"
149
  elif "final order" in command:
150
  if cart:
151
  total = sum(prices[item] * count for item, count in cart.items())
152
- response = f"Your final order is: {', '.join(f'{item} x{count}' for item, count in cart.items())}. Your total bill is ${total}. Thank you for ordering! To exit this conversation say nothing or goodbye."
153
  cart.clear()
154
  return response
155
  return "Your cart is empty. Please add items to your cart first."
@@ -159,26 +197,137 @@ def process_command(command):
159
  return "Goodbye! Thank you for using AI Dining Assistant."
160
  return "Sorry, I couldn't understand that. Please try again."
161
 
162
- html_code = """<!DOCTYPE html>
 
163
  <html lang="en">
164
  <head>
165
  <meta charset="UTF-8">
166
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
167
  <title>AI Dining Assistant</title>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
  </head>
169
  <body>
170
  <h1>AI Dining Assistant</h1>
171
- <button id="mic-button">🎤 Speak</button>
172
- <div id="response"></div>
 
173
  <script>
174
  const micButton = document.getElementById('mic-button');
175
- const responseDiv = document.getElementById('response');
 
 
 
 
176
  micButton.addEventListener('click', () => {
177
- alert('Audio capture functionality not implemented in this sample HTML.');
 
 
 
178
  });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
  </script>
180
  </body>
181
- </html>"""
182
-
183
  if __name__ == "__main__":
184
  app.run(host="0.0.0.0", port=7860)
 
1
  from flask import Flask, render_template_string, request, jsonify
2
+ import speech_recognition as sr
3
  from tempfile import NamedTemporaryFile
 
 
 
4
  import os
5
+ import ffmpeg
6
+ from fuzzywuzzy import process, fuzz
7
+ import phonetics
8
+ from metaphone import doublemetaphone
9
  import logging
10
 
 
11
  app = Flask(__name__)
12
  logging.basicConfig(level=logging.INFO)
13
 
14
+ # Global variables
 
 
 
15
  cart = {}
16
  menu_preferences = "all"
17
  prices = {
 
63
 
64
  converted_file = NamedTemporaryFile(delete=False, suffix=".wav")
65
  ffmpeg.input(temp_file.name).output(
66
+ converted_file.name, acodec="pcm_s16le", ac=1, ar="16000", af="highpass=f=200,lowpass=f=3000,acompressor"
67
  ).run(overwrite_output=True)
68
 
69
+ recognizer = sr.Recognizer()
70
+ recognizer.dynamic_energy_threshold = True
71
+ recognizer.energy_threshold = 100
72
+ recognizer.pause_threshold = 1.0
73
+ recognizer.phrase_threshold = 0.3
74
+ recognizer.non_speaking_duration = 0.2
75
+
76
+ with sr.AudioFile(converted_file.name) as source:
77
+ recognizer.adjust_for_ambient_noise(source, duration=1)
78
+ audio_data = recognizer.record(source)
79
+
80
+ raw_command = None
81
+ for _ in range(2): # Retry recognition twice if needed
82
+ try:
83
+ raw_command = recognizer.recognize_google(audio_data).lower()
84
+ break
85
+ except sr.UnknownValueError:
86
+ response = "Sorry, I couldn't catch that. Could you repeat?"
87
+ logging.info(response)
88
+
89
+ if raw_command is None:
90
+ return jsonify({"response": "Sorry, I couldn't understand. Please try again."})
91
+
92
  logging.info(f"Raw recognized command: {raw_command}")
93
 
94
  # Preprocess command
95
  all_menu_items = menus["all"]
96
  command = preprocess_command(raw_command, all_menu_items)
97
 
98
+ # Pass preprocessed command to process_command
99
  response = process_command(command)
100
 
101
  except Exception as e:
 
110
  """
111
  Preprocess the user command:
112
  - Normalize speech for accents and speed using fuzzy matching.
113
+ - Phonetically match menu items.
114
  """
115
+ def phonetic_match(word, options):
116
+ word_phonetic = doublemetaphone(word)[0]
117
+ for option in options:
118
+ if doublemetaphone(option)[0] == word_phonetic:
119
+ return option
120
+ return None
121
+
122
+ # First, try fuzzy matching
123
+ closest_match = process.extractOne(command, menu_items, scorer=fuzz.token_set_ratio)
124
+ if closest_match and closest_match[1] > 60:
125
  return closest_match[0]
126
+
127
+ # Fallback to phonetic matching
128
+ words = command.split()
129
+ for word in words:
130
+ match = phonetic_match(word, menu_items)
131
+ if match:
132
+ return match
133
+
134
  return command
135
 
136
  def process_command(command):
137
  global cart, menu_preferences
138
  command = command.lower()
139
 
140
+ # Recognize menu preferences explicitly
141
  if menu_preferences == "all":
142
  if "non-vegetarian" in command:
143
  menu_preferences = "non-vegetarian"
144
+ return "You have chosen the Non-Vegetarian menu. To view menu say menu"
145
  elif "vegetarian" in command and "non-vegetarian" not in command:
146
  menu_preferences = "vegetarian"
147
+ return "You have chosen the Vegetarian menu. To view menu say menu"
148
  elif "guilt-free" in command:
149
  menu_preferences = "guilt-free"
150
+ return "You have chosen the Guilt-Free menu. To view menu say menu"
151
  elif "all" in command:
152
  menu_preferences = "all"
153
+ return "You have chosen the complete menu. To view menu say menu"
154
 
155
+ # Filtered menu based on preference
156
  menu = menus.get(menu_preferences, menus["all"])
157
 
158
  if "menu" in command:
159
  return f"Here is your menu: {', '.join(menu)}. To select an item say item name."
160
  elif "price of" in command:
161
  item = command.replace("price of", "").strip()
162
+ closest_match = process.extractOne(item, prices.keys(), scorer=fuzz.token_set_ratio)
163
+ if closest_match and closest_match[1] > 60:
164
  matched_item = closest_match[0]
165
  return f"The price of {matched_item} is ${prices[matched_item]}."
166
  return "Sorry, I couldn't find that item in the menu."
167
  elif "remove" in command:
168
+ # Extract the item name after "remove"
169
  item = command.replace("remove", "").strip()
170
+ closest_match = process.extractOne(item, list(cart.keys()), scorer=fuzz.token_set_ratio)
171
+ if closest_match and closest_match[1] > 60:
172
  matched_item = closest_match[0]
173
  if cart[matched_item] > 1:
174
  cart[matched_item] -= 1
 
178
  return f"{matched_item.capitalize()} has been removed from your cart. Current cart: {dict(cart)}."
179
  return "Sorry, that item is not in your cart."
180
  elif any(item in command for item in menu):
181
+ closest_match = process.extractOne(command, menu, scorer=fuzz.token_set_ratio)
182
+ if closest_match and closest_match[1] > 60:
183
  matched_item = closest_match[0]
184
  cart[matched_item] = cart.get(matched_item, 0) + 1
185
+ return f"{matched_item.capitalize()} added to your cart. Current cart: {dict(cart)}. To finalize say final order"
186
  return "Sorry, I couldn't recognize the item. Could you try again?"
187
  elif "final order" in command:
188
  if cart:
189
  total = sum(prices[item] * count for item, count in cart.items())
190
+ response = f"Your final order is: {', '.join(f'{item} x{count}' for item, count in cart.items())}. Your total bill is ${total}. Thank you for ordering! To exit this conversation say nothing or goodbye!"
191
  cart.clear()
192
  return response
193
  return "Your cart is empty. Please add items to your cart first."
 
197
  return "Goodbye! Thank you for using AI Dining Assistant."
198
  return "Sorry, I couldn't understand that. Please try again."
199
 
200
+ html_code = """
201
+ <!DOCTYPE html>
202
  <html lang="en">
203
  <head>
204
  <meta charset="UTF-8">
205
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
206
  <title>AI Dining Assistant</title>
207
+ <style>
208
+ body {
209
+ display: flex;
210
+ flex-direction: column;
211
+ align-items: center;
212
+ justify-content: center;
213
+ min-height: 100vh;
214
+ margin: 0;
215
+ font-family: Arial, sans-serif;
216
+ background-color: #f4f4f9;
217
+ }
218
+ h1 {
219
+ color: #333;
220
+ }
221
+ .mic-button {
222
+ font-size: 2rem;
223
+ padding: 1rem 2rem;
224
+ color: white;
225
+ background-color: #007bff;
226
+ border: none;
227
+ border-radius: 50px;
228
+ cursor: pointer;
229
+ transition: background-color 0.3s;
230
+ }
231
+ .mic-button:hover {
232
+ background-color: #0056b3;
233
+ }
234
+ .status, .response {
235
+ margin-top: 1rem;
236
+ text-align: center;
237
+ color: #555;
238
+ font-size: 1.2rem;
239
+ }
240
+ .response {
241
+ background-color: #e8e8ff;
242
+ padding: 1rem;
243
+ border-radius: 10px;
244
+ box-shadow: 0 4px 8px rgba(0, 0, 0, 0.2);
245
+ display: none;
246
+ }
247
+ </style>
248
  </head>
249
  <body>
250
  <h1>AI Dining Assistant</h1>
251
+ <button class="mic-button" id="mic-button">🎤</button>
252
+ <div class="status" id="status">Press the mic button to start...</div>
253
+ <div class="response" id="response">Response will appear here...</div>
254
  <script>
255
  const micButton = document.getElementById('mic-button');
256
+ const status = document.getElementById('status');
257
+ const response = document.getElementById('response');
258
+ let mediaRecorder;
259
+ let audioChunks = [];
260
+ let isConversationActive = false;
261
  micButton.addEventListener('click', () => {
262
+ if (!isConversationActive) {
263
+ isConversationActive = true;
264
+ startConversation();
265
+ }
266
  });
267
+ function startConversation() {
268
+ const utterance = new SpeechSynthesisUtterance('Please choose your preference: All, Vegetarian, Non-Vegetarian, or Guilt-Free.');
269
+ speechSynthesis.speak(utterance);
270
+ utterance.onend = () => {
271
+ status.textContent = 'Listening...';
272
+ startListening();
273
+ };
274
+ }
275
+ function startListening() {
276
+ navigator.mediaDevices.getUserMedia({ audio: true }).then(stream => {
277
+ mediaRecorder = new MediaRecorder(stream, { mimeType: 'audio/webm;codecs=opus' });
278
+ mediaRecorder.start();
279
+ audioChunks = [];
280
+ mediaRecorder.ondataavailable = event => audioChunks.push(event.data);
281
+ mediaRecorder.onstop = async () => {
282
+ const audioBlob = new Blob(audioChunks, { type: 'audio/webm' });
283
+ const formData = new FormData();
284
+ formData.append('audio', audioBlob);
285
+ status.textContent = 'Processing...';
286
+ try {
287
+ const result = await fetch('/process-audio', { method: 'POST', body: formData });
288
+ const data = await result.json();
289
+ response.textContent = data.response;
290
+ response.style.display = 'block';
291
+ const utterance = new SpeechSynthesisUtterance(data.response);
292
+ speechSynthesis.speak(utterance);
293
+ utterance.onend = () => {
294
+ console.log("Speech synthesis completed.");
295
+ if (data.response.includes("Goodbye")) {
296
+ status.textContent = 'Conversation ended. Press the mic button to start again.';
297
+ isConversationActive = false;
298
+ fetch('/reset-cart'); // Reset the cart dynamically on end
299
+ } else if (data.response.includes("Your order is complete")) {
300
+ status.textContent = 'Order complete. Thank you for using AI Dining Assistant.';
301
+ isConversationActive = false;
302
+ fetch('/reset-cart'); // Reset the cart after final order
303
+ } else {
304
+ status.textContent = 'Listening...';
305
+ setTimeout(() => {
306
+ startListening();
307
+ }, 100);
308
+ }
309
+ };
310
+ utterance.onerror = (e) => {
311
+ console.error("Speech synthesis error:", e.error);
312
+ status.textContent = 'Error with speech output.';
313
+ isConversationActive = false;
314
+ };
315
+ } catch (error) {
316
+ response.textContent = 'Sorry, I could not understand. Please try again.';
317
+ response.style.display = 'block';
318
+ status.textContent = 'Press the mic button to restart the conversation.';
319
+ isConversationActive = false;
320
+ }
321
+ };
322
+ setTimeout(() => mediaRecorder.stop(), 5000);
323
+ }).catch(() => {
324
+ status.textContent = 'Microphone access denied.';
325
+ isConversationActive = false;
326
+ });
327
+ }
328
  </script>
329
  </body>
330
+ </html>
331
+ """
332
  if __name__ == "__main__":
333
  app.run(host="0.0.0.0", port=7860)