Spaces:

nagasurendra
/

Voice_Menu_Ordering

Sleeping

App Files Files Community

nagasurendra commited on Dec 28, 2024

Commit

cf344c7

verified ·

1 Parent(s): 49e8f5c

Update app.py

Browse files

Files changed (1) hide show

app.py +81 -112

app.py CHANGED Viewed

@@ -1,128 +1,97 @@
 import gradio as gr
-import edge_tts
-import asyncio
-import tempfile
-import numpy as np
-from pydub import AudioSegment
-import torch
-import sentencepiece as spm
-import onnxruntime as ort
-from huggingface_hub import hf_hub_download
-# Dynamic Menu Items
-MENU = {
-    "Pizza": 10.99,
-    "Burger": 6.99,
-    "Pasta": 8.49,
-    "Salad": 5.49,
-    "Soda": 1.99,
-    "Coffee": 2.99
 }
-cart = []  # To store cart items
-# Speech Recognition Model Configuration
-model_name = "neongeckocom/stt_en_citrinet_512_gamma_0_25"
-sample_rate = 16000
-# Download preprocessor, encoder, and tokenizer
-preprocessor = torch.jit.load(hf_hub_download(model_name, "preprocessor.ts", subfolder="onnx"))
-encoder = ort.InferenceSession(hf_hub_download(model_name, "model.onnx", subfolder="onnx"))
-tokenizer = spm.SentencePieceProcessor(hf_hub_download(model_name, "tokenizer.spm", subfolder="onnx"))
-async def text_to_speech(text):
-    communicate = edge_tts.Communicate(text)
-    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
-        tmp_path = tmp_file.name
-        await communicate.save(tmp_path)
-    return tmp_path
-def resample(audio_fp32, sr):
-    return soxr.resample(audio_fp32, sr, sample_rate)
-def to_float32(audio_buffer):
-    return np.divide(audio_buffer, np.iinfo(audio_buffer.dtype).max, dtype=np.float32)
-def transcribe(audio_path):
-    audio_file = AudioSegment.from_file(audio_path)
-    sr = audio_file.frame_rate
-    audio_buffer = np.array(audio_file.get_array_of_samples())
-    audio_fp32 = to_float32(audio_buffer)
-    audio_16k = resample(audio_fp32, sr)
-    input_signal = torch.tensor(audio_16k).unsqueeze(0)
-    length = torch.tensor(len(audio_16k)).unsqueeze(0)
-    processed_signal, _ = preprocessor.forward(input_signal=input_signal, length=length)
-    logits = encoder.run(None, {'audio_signal': processed_signal.numpy(), 'length': length.numpy()})[0][0]
-    blank_id = tokenizer.vocab_size()
-    decoded_prediction = [p for p in logits.argmax(axis=1).tolist() if p != blank_id]
-    text = tokenizer.decode_ids(decoded_prediction)
-    return text
-def generate_menu():
-    menu_text = "Here is our menu:\n"
-    for item, price in MENU.items():
-        menu_text += f"{item}: ${price:.2f}\n"
-    menu_text += "What would you like to order?"
-    return menu_text
-def handle_cart(command):
-    global cart
-    response = ""
-    # Check for menu-related commands
-    if "menu" in command.lower():
-        response = generate_menu()
-    # Check for add-to-cart commands
-    else:
-        for item in MENU.keys():
-            if item.lower() in command.lower():
                 cart.append(item)
-                response = f"{item} has been added to your cart."
-                break
-    # If user asks for cart
-    if "cart" in command.lower():
-        if cart:
-            response = "Your cart contains:\n" + ", ".join(cart)
         else:
-            response = "Your cart is empty."
-    # If user confirms order
-    if "submit" in command.lower() or "done" in command.lower():
-        if cart:
-            response = "Your final order is:\n" + ", ".join(cart) + ". Thank you for your order!"
-            cart = []  # Clear the cart
         else:
-            response = "Your cart is empty. Add some items before submitting."
-    return response
-async def respond(audio):
-    try:
-        user_command = transcribe(audio)
-        reply = handle_cart(user_command)
-        reply_audio_path = await text_to_speech(reply)
-        return user_command, reply, reply_audio_path
-    except Exception as e:
-        return "Error: Could not transcribe audio.", "Error: Could not process your request.", None
-with gr.Blocks() as demo:
-    with gr.Row():
-        audio_input = gr.Audio(label="Speak Here", type="filepath")
-        submit = gr.Button("Submit")
-    with gr.Row():
-        transcribed_text = gr.Textbox(label="Transcribed Text")
-        response_text = gr.Textbox(label="GPT Response")
-        response_audio = gr.Audio(label="Response Audio")
-    submit.click(fn=respond, inputs=[audio_input], outputs=[transcribed_text, response_text, response_audio])
 if __name__ == "__main__":
-    demo.queue().launch()

 import gradio as gr
+from gtts import gTTS
+import os
+import speech_recognition as sr
+# Initialize recognizer
+recognizer = sr.Recognizer()
+# Menu items
+menu_items = {
+    "biryani": ["Chicken Biryani", "Mutton Biryani", "Vegetable Biryani", "Egg Biryani"],
+    "starters": ["Chicken Tikka", "Paneer Tikka", "Fish Fry", "Veg Manchurian"],
+    "drinks": ["Coke", "Pepsi", "Lemonade", "Mango Juice", "Water"]
 }
+cart = []
+# Text-to-Speech Function
+def text_to_speech(text):
+    """Convert text to speech and provide audio file."""
+    tts = gTTS(text=text, lang='en')
+    file_path = "response.mp3"
+    tts.save(file_path)
+    return file_path
+# Read Menu Function
+def read_menu():
+    """Generate the menu text and read it aloud."""
+    menu_text = "Here is the menu. Starting with Biryani options: "
+    for item in menu_items["biryani"]:
+        menu_text += item + ". "
+    menu_text += "Now the Starters: "
+    for item in menu_items["starters"]:
+        menu_text += item + ". "
+    menu_text += "Finally, Drinks: "
+    for item in menu_items["drinks"]:
+        menu_text += item + ". "
+    return menu_text, text_to_speech(menu_text)
+# Process Voice Command
+def process_command(audio_path):
+    """Process the user's voice command."""
+    try:
+        with sr.AudioFile(audio_path) as source:
+            audio_data = recognizer.record(source)
+            command = recognizer.recognize_google(audio_data).lower()
+    except Exception as e:
+        error_text = "Sorry, I could not process the audio."
+        return "Error", text_to_speech(error_text)
+    if "menu" in command:
+        menu_text, menu_audio = read_menu()
+        return menu_text, menu_audio
+    for category, items in menu_items.items():
+        for item in items:
+            if item.lower() in command:
                 cart.append(item)
+                response_text = f"{item} has been added to your cart."
+                return response_text, text_to_speech(response_text)
+    if "cart" in command:
+        if not cart:
+            response_text = "Your cart is empty."
         else:
+            response_text = "Your cart contains: " + ", ".join(cart)
+        return response_text, text_to_speech(response_text)
+    if "submit" in command or "done" in command:
+        if not cart:
+            response_text = "Your cart is empty. Add some items before submitting."
         else:
+            response_text = "Your final order is: " + ", ".join(cart) + ". Thank you for your order!"
+            cart.clear()
+        return response_text, text_to_speech(response_text)
+    error_text = "Sorry, I couldn't understand your request."
+    return error_text, text_to_speech(error_text)
+# Gradio App
+def app():
+    """Create the Gradio interface."""
+    with gr.Blocks() as demo:
+        gr.Markdown("# Voice-Activated Restaurant Menu System")
+        gr.Markdown("Speak your command to interact with the menu system dynamically.")
+        with gr.Row():
+            voice_input = gr.Audio(type="filepath", label="Speak Your Command")
+            transcribed_text = gr.Textbox(label="Transcribed Command")
+            response_text = gr.Textbox(label="Response Text")
+            audio_output = gr.Audio(label="Audio Response")
+        voice_input.change(fn=process_command, inputs=voice_input, outputs=[response_text, audio_output])
+    return demo
 if __name__ == "__main__":
+    app().launch()