import gradio as gr from pydub import AudioSegment import librosa import torch import soundfile as sf import numpy as np import os # import your existing functions from model import textonly, speechonly def text_interface(text): """Process text input and return response""" result = textonly(text) return result def speech_interface(audio_file): """Process speech input and return LLM response and audio output""" if audio_file is None: return "Please provide an audio file", None # audio_file is a tuple of (sample_rate, audio_data) from Gradio sr, audio_data = audio_file # Convert to mono if needed if len(audio_data.shape) > 1: audio_data = np.mean(audio_data, axis=1) # Resample to 16000 Hz if necessary if sr != 16000: audio_data = librosa.resample(audio_data, orig_sr=sr, target_sr=16000) # Call the speechonly function llm_response = speechonly(audio_data, output_wav_path="output.wav") return llm_response # Create Gradio interface with tabs with gr.Blocks(title="Hamid AI Speech API") as app: gr.Markdown("# Hamid AI Speech Interface") gr.Markdown("Choose between text-only or speech-based interaction") with gr.Tab("Text Only"): text_input = gr.Textbox(label="Enter your text", placeholder="Type something...") text_output = gr.Textbox(label="Response", interactive=False) text_button = gr.Button("Process Text") text_button.click(fn=text_interface, inputs=text_input, outputs=text_output) with gr.Tab("Speech Only"): audio_input = gr.Audio(label="Upload or record audio", type="numpy") speech_output = gr.Textbox(label="LLM Response", interactive=False) audio_output = gr.Audio(label="Output Audio", type="filepath") speech_button = gr.Button("Process Speech") speech_button.click(fn=speech_interface, inputs=audio_input, outputs=[speech_output, audio_output]) if __name__ == "__main__": app.launch(share=False)