Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import pandas as pd | |
| import edge_tts | |
| import asyncio | |
| import tempfile | |
| import numpy as np | |
| import soxr | |
| from pydub import AudioSegment | |
| import torch | |
| import sentencepiece as spm | |
| import onnxruntime as ort | |
| from huggingface_hub import hf_hub_download | |
| # Load Menu Data | |
| def load_menu(): | |
| menu_file = "menu.xlsx" | |
| try: | |
| return pd.read_excel(menu_file) | |
| except Exception as e: | |
| raise ValueError(f"Error loading menu file: {e}") | |
| # Filter Menu Items | |
| def filter_menu(preference): | |
| menu_data = load_menu() | |
| if preference == "Halal/Non-Veg": | |
| filtered_data = menu_data[menu_data["Ingredients"].str.contains("Chicken|Mutton|Fish|Prawns|Goat", case=False, na=False)] | |
| elif preference == "Vegetarian": | |
| filtered_data = menu_data[~menu_data["Ingredients"].str.contains("Chicken|Mutton|Fish|Prawns|Goat", case=False, na=False)] | |
| elif preference == "Guilt-Free": | |
| filtered_data = menu_data[menu_data["Description"].str.contains(r"Fat: ([0-9]|10)g", case=False, na=False)] | |
| else: | |
| filtered_data = menu_data | |
| menu_html = """" # Prepare dynamic HTML for the menu | |
| for _, item in filtered_data.iterrows(): | |
| menu_html += f""" | |
| <div> | |
| <h3>{item['Dish Name']}</h3> | |
| <p>Price: ${item['Price ($)']}</p> | |
| <p>Description: {item['Description']}</p> | |
| </div> | |
| """ | |
| return menu_html | |
| # Speech Recognition Model Configuration | |
| model_name = "neongeckocom/stt_en_citrinet_512_gamma_0_25" | |
| sample_rate = 16000 | |
| preprocessor = torch.jit.load(hf_hub_download(model_name, "preprocessor.ts", subfolder="onnx")) | |
| encoder = ort.InferenceSession(hf_hub_download(model_name, "model.onnx", subfolder="onnx")) | |
| tokenizer = spm.SentencePieceProcessor(hf_hub_download(model_name, "tokenizer.spm", subfolder="onnx")) | |
| async def respond(audio_path, preference): | |
| # Transcribe audio to text | |
| transcription = transcribe(audio_path) | |
| # Voice-based interaction logic | |
| if "vegetarian" in transcription.lower(): | |
| preference = "Vegetarian" | |
| elif "non-veg" in transcription.lower() or "halal" in transcription.lower(): | |
| preference = "Halal/Non-Veg" | |
| elif "guilt-free" in transcription.lower(): | |
| preference = "Guilt-Free" | |
| elif "menu details" in transcription.lower(): | |
| preference = "All" | |
| # Filter menu based on preference | |
| menu_html = filter_menu(preference) | |
| # Text-to-Speech Response | |
| reply = f"Here are some {preference} dishes available." if preference != "All" else "Here are all the menu details available." | |
| communicate = edge_tts.Communicate(reply) | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file: | |
| tmp_path = tmp_file.name | |
| await communicate.save(tmp_path) | |
| return tmp_path, menu_html | |
| def transcribe(audio_path): | |
| audio_file = AudioSegment.from_file(audio_path) | |
| sr = audio_file.frame_rate | |
| audio_buffer = np.array(audio_file.get_array_of_samples()) | |
| audio_fp32 = np.divide(audio_buffer, np.iinfo(audio_buffer.dtype).max, dtype=np.float32) | |
| audio_16k = soxr.resample(audio_fp32, sr, sample_rate) | |
| input_signal = torch.tensor(audio_16k).unsqueeze(0) | |
| length = torch.tensor(len(audio_16k)).unsqueeze(0) | |
| processed_signal, _ = preprocessor.forward(input_signal=input_signal, length=length) | |
| logits = encoder.run(None, {'audio_signal': processed_signal.numpy(), 'length': length.numpy()})[0][0] | |
| blank_id = tokenizer.vocab_size() | |
| decoded_prediction = [p for p in logits.argmax(axis=1).tolist() if p != blank_id] | |
| text = tokenizer.decode_ids(decoded_prediction) | |
| return text | |
| with gr.Blocks() as demo: | |
| with gr.Row(): | |
| gr.Markdown(""" | |
| <div style="text-align: right; margin-bottom: 10px;"> | |
| <img src="/mnt/data/Screenshot%202024-12-28%20102122.png" alt="Microphone Icon" style="width: 30px; height: 30px; cursor: pointer;"> | |
| </div> | |
| """) | |
| audio_input = gr.Audio(label="Speak your preference", source="microphone", type="filepath") | |
| preference = gr.Textbox(label="Current Preference", value="All") | |
| audio_output = gr.Audio(label="Assistant Response", autoplay=True) | |
| menu_output = gr.HTML(label="Menu Suggestions") | |
| audio_input.change(respond, inputs=[audio_input, preference], outputs=[audio_output, menu_output]) | |
| if __name__ == "__main__": | |
| demo.queue().launch() | |