File size: 922 Bytes
eb7784f
 
4128dbb
7e08de2
eb7784f
7e08de2
b8197dd
eb7784f
 
 
7570ba2
31d1e52
7570ba2
7e08de2
43c3828
eb7784f
 
b9a4cf0
7570ba2
9f673f0
60007a3
1d81e34
f8912d8
 
 
eb7784f
 
43c3828
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import gradio as gr
import base64
import torch
from transformers import pipeline

# Whisper ๋ชจ๋ธ์„ pipeline์œผ๋กœ ๋ถˆ๋Ÿฌ์˜ค๊ธฐ
whisper = pipeline("automatic-speech-recognition", model="openai/whisper-small")

# ์Œ์„ฑ์„ ํ…์ŠคํŠธ๋กœ ๋ณ€ํ™˜ํ•˜๋Š” ํ•จ์ˆ˜
def transcribe_audio(audio):
    if audio is None:
        return "์—๋Ÿฌ: ์˜ค๋””์˜ค ์—†์Œ", ""
        
    result = whisper(audio)
    return result["text"], base64.b64encode(result["text"].encode()).decode()

# Gradio ์ธํ„ฐํŽ˜์ด์Šค
demo = gr.Interface(
    fn=transcribe_audio,
    inputs=gr.Audio(label = '์˜ค๋””์˜ค', sources="microphone", type='filepath'),
    outputs=[gr.Textbox(label='๊ฒฐ๊ณผ'), gr.Textbox(label='์•”ํ˜ธํ™”๋œ ๊ฒฐ๊ณผ')],
    title='์ด์šฐ์ง„์˜ Speech to Text (โ€ป ๋…น์Œ ํ›„ ๋ฐ”๋กœ ์‹คํ–‰ ๋ˆ„๋ฅด๋ฉด ์—๋Ÿฌ๋‚จ)', 
    description='๊ธฐ์—ฌ๋„: AI 60% ๋‚˜ 40%',
    submit_btn='์‹คํ–‰',
    clear_btn='์ง€์šฐ๊ธฐ')

# ์•ฑ ์‹คํ–‰
demo.launch()