| import io | |
| import os | |
| import soundfile as sf | |
| from dotenv import load_dotenv | |
| from huggingface_hub import InferenceClient | |
| from smolagents import CodeAgent, GradioUI, HfApiModel | |
| load_dotenv() | |
| def convert_data_to_audio_filelike(your_input_tuple): | |
| """Convert (sample_rate, np.ndarray) to a BytesIO WAV file""" | |
| sample_rate, audio_data = your_input_tuple | |
| buffer = io.BytesIO() | |
| sf.write(buffer, audio_data, sample_rate, format="WAV") | |
| buffer.seek(0) | |
| return buffer | |
| def speech2text_func(data, model: str = "openai/whisper-small.en") -> str: | |
| if isinstance(data, tuple): | |
| buffer = convert_data_to_audio_filelike(data) | |
| data = buffer.read() | |
| client = InferenceClient( | |
| provider="hf-inference", | |
| api_key=os.getenv("HF_TOKEN"), | |
| ) | |
| return client.automatic_speech_recognition(data, model=model).text | |
| def get_tools(): | |
| add_base_tools = True | |
| tools_list = [] | |
| return tools_list, add_base_tools | |
| if __name__ == "__main__": | |
| tools_list, add_base_tools = get_tools() | |
| model = HfApiModel("Qwen/Qwen2.5-Coder-32B-Instruct", provider=None) | |
| agent = CodeAgent( | |
| tools=tools_list, | |
| model=model, | |
| add_base_tools=add_base_tools, | |
| additional_authorized_imports=["web_search"], | |
| ) | |
| GradioUI(agent).launch(speech2text_func=speech2text_func) | |