| import gradio as gr | |
| from ultralytics import YOLO | |
| import speech_recognition as sr | |
| model = YOLO('best.pt') | |
| def proses_image(image): | |
| results = model(image) | |
| return results[0].plot() | |
| def process_audio(audio): | |
| recognizer = sr.Recognizer() | |
| with sr.AudioFile(audio) as source: | |
| audio_data = recognizer.record(source) | |
| try: | |
| text = recognizer.recognize_google(audio_data) | |
| return text | |
| except sr.UnknownValueError: | |
| return "Audio tidak dapat dikenali." | |
| except sr.RequestError as e: | |
| return f"Error dengan layanan pengenalan suara: {e}" | |
| def update_visibility(input): | |
| if input == "Gambar": | |
| return gr.Row(visible=True), gr.Row(visible=False) | |
| elif input == "Audio": | |
| return gr.Row(visible=False), gr.Row(visible=True) | |
| with gr.Blocks() as demo: | |
| with gr.Row(): | |
| gr.Markdown("# Multimodal America Sign Language") | |
| with gr.Row(): | |
| input = gr.Radio(["Gambar", "Audio"], value="Gambar", label="Pilih mode:") | |
| with gr.Row(visible=True) as gambar: | |
| gr.Interface( | |
| fn=proses_image, | |
| inputs=gr.Image(), | |
| outputs=gr.Image(), | |
| live=True | |
| ) | |
| with gr.Row(visible=False) as audio: | |
| gr.Interface( | |
| fn=process_audio, | |
| inputs=gr.Audio(sources="microphone", type="filepath", streaming=True), | |
| outputs=gr.Textbox(), | |
| live=True | |
| ) | |
| input.change(update_visibility, inputs=[input], outputs=[gambar, audio]) | |
| demo.launch() | |