Spaces:
Runtime error
Runtime error
Commit ·
71f9a5c
1
Parent(s): e17707c
Upload folder using huggingface_hub
Browse files- .github/workflows/update_space.yml +28 -0
- 00-00_voice2text.py +61 -0
- 00-01_voice2text.py +42 -0
- README.md +3 -9
- requirements.txt +104 -0
.github/workflows/update_space.yml
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Run Python script
|
| 2 |
+
|
| 3 |
+
on:
|
| 4 |
+
push:
|
| 5 |
+
branches:
|
| 6 |
+
- main
|
| 7 |
+
|
| 8 |
+
jobs:
|
| 9 |
+
build:
|
| 10 |
+
runs-on: ubuntu-latest
|
| 11 |
+
|
| 12 |
+
steps:
|
| 13 |
+
- name: Checkout
|
| 14 |
+
uses: actions/checkout@v2
|
| 15 |
+
|
| 16 |
+
- name: Set up Python
|
| 17 |
+
uses: actions/setup-python@v2
|
| 18 |
+
with:
|
| 19 |
+
python-version: '3.9'
|
| 20 |
+
|
| 21 |
+
- name: Install Gradio
|
| 22 |
+
run: python -m pip install gradio
|
| 23 |
+
|
| 24 |
+
- name: Log in to Hugging Face
|
| 25 |
+
run: python -c 'import huggingface_hub; huggingface_hub.login(token="${{ secrets.hf_token }}")'
|
| 26 |
+
|
| 27 |
+
- name: Deploy to Spaces
|
| 28 |
+
run: gradio deploy
|
00-00_voice2text.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import whisper
|
| 2 |
+
import gradio as gr
|
| 3 |
+
import time
|
| 4 |
+
from pyChatGPT import ChatGPT
|
| 5 |
+
import warnings
|
| 6 |
+
import openai
|
| 7 |
+
from gtts import gTTS
|
| 8 |
+
from dotenv import load_dotenv
|
| 9 |
+
import os
|
| 10 |
+
|
| 11 |
+
# Load the .env file and import variables
|
| 12 |
+
load_dotenv()
|
| 13 |
+
|
| 14 |
+
# Assign the values to python variables
|
| 15 |
+
OPENAI_API_KEY = os.getenv('openai_api_key')
|
| 16 |
+
|
| 17 |
+
warnings.filterwarnings("ignore")
|
| 18 |
+
model = whisper.load_model("base")
|
| 19 |
+
|
| 20 |
+
def transcribe(audio):
|
| 21 |
+
|
| 22 |
+
# load audio and pad/trim it to fit 30 seconds
|
| 23 |
+
audio = whisper.load_audio(audio)
|
| 24 |
+
audio = whisper.pad_or_trim(audio)
|
| 25 |
+
|
| 26 |
+
# make log-Mel spectrogram and move to the same device as the model
|
| 27 |
+
mel = whisper.log_mel_spectrogram(audio).to(model.device)
|
| 28 |
+
|
| 29 |
+
# detect the spoken language
|
| 30 |
+
_, probs = model.detect_language(mel)
|
| 31 |
+
|
| 32 |
+
# decode the audio
|
| 33 |
+
options = whisper.DecodingOptions(fp16=False)
|
| 34 |
+
result = whisper.decode(model, mel, options)
|
| 35 |
+
result_text = result.text
|
| 36 |
+
|
| 37 |
+
# Call OpenAI API for text for reply back.
|
| 38 |
+
openai.api_key = OPENAI_API_KEY
|
| 39 |
+
result = openai.Completion.create(
|
| 40 |
+
model="text-davinci-003",
|
| 41 |
+
prompt=result_text,
|
| 42 |
+
max_tokens=500,
|
| 43 |
+
temperature=0
|
| 44 |
+
)
|
| 45 |
+
out_result = result["choices"][0]["text"]
|
| 46 |
+
return [result_text, out_result]
|
| 47 |
+
|
| 48 |
+
output_1 = gr.Textbox(label="Speech to Text")
|
| 49 |
+
output_2 = gr.Textbox(label="ChatGPT Output")
|
| 50 |
+
|
| 51 |
+
gr.Interface(
|
| 52 |
+
title = 'Voice to Text using OpenAI (KF)',
|
| 53 |
+
fn=transcribe,
|
| 54 |
+
inputs=[
|
| 55 |
+
gr.inputs.Audio(source="microphone", type="filepath")
|
| 56 |
+
],
|
| 57 |
+
|
| 58 |
+
outputs=[
|
| 59 |
+
output_1, output_2
|
| 60 |
+
],
|
| 61 |
+
live=True, allow_flagging=False).launch(share=True)
|
00-01_voice2text.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import whisper
|
| 2 |
+
import gradio as gr
|
| 3 |
+
import time
|
| 4 |
+
from pyChatGPT import ChatGPT
|
| 5 |
+
import warnings
|
| 6 |
+
from gtts import gTTS
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
warnings.filterwarnings("ignore")
|
| 10 |
+
# model = whisper.load_model("base")
|
| 11 |
+
model = whisper.load_model("base")
|
| 12 |
+
|
| 13 |
+
def transcribe(audio):
|
| 14 |
+
|
| 15 |
+
# load audio and pad/trim it to fit 30 seconds
|
| 16 |
+
audio = whisper.load_audio(audio)
|
| 17 |
+
audio = whisper.pad_or_trim(audio)
|
| 18 |
+
|
| 19 |
+
# make log-Mel spectrogram and move to the same device as the model
|
| 20 |
+
mel = whisper.log_mel_spectrogram(audio).to(model.device)
|
| 21 |
+
|
| 22 |
+
# decode the audio
|
| 23 |
+
options = whisper.DecodingOptions(fp16=False)
|
| 24 |
+
result = whisper.decode(model, mel, options)
|
| 25 |
+
result_text = result.text
|
| 26 |
+
|
| 27 |
+
# print the result
|
| 28 |
+
return result_text
|
| 29 |
+
|
| 30 |
+
output_1 = gr.Textbox(label="Speech to Text")
|
| 31 |
+
|
| 32 |
+
gr.Interface(
|
| 33 |
+
title = 'Voice to Text (KF)',
|
| 34 |
+
fn=transcribe,
|
| 35 |
+
inputs=[
|
| 36 |
+
gr.inputs.Audio(source="microphone", type="filepath")
|
| 37 |
+
],
|
| 38 |
+
|
| 39 |
+
outputs=[
|
| 40 |
+
output_1
|
| 41 |
+
],
|
| 42 |
+
live=True, allow_flagging=False).launch(share=True)
|
README.md
CHANGED
|
@@ -1,12 +1,6 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
|
| 4 |
-
colorFrom: red
|
| 5 |
-
colorTo: red
|
| 6 |
sdk: gradio
|
| 7 |
-
sdk_version: 3.
|
| 8 |
-
app_file: app.py
|
| 9 |
-
pinned: false
|
| 10 |
---
|
| 11 |
-
|
| 12 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
| 1 |
---
|
| 2 |
+
title: v2t
|
| 3 |
+
app_file: 00-01_voice2text.py
|
|
|
|
|
|
|
| 4 |
sdk: gradio
|
| 5 |
+
sdk_version: 3.42.0
|
|
|
|
|
|
|
| 6 |
---
|
|
|
|
|
|
requirements.txt
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
aiofiles==23.2.1
|
| 2 |
+
aiohttp==3.8.5
|
| 3 |
+
aiosignal==1.3.1
|
| 4 |
+
altair==5.1.1
|
| 5 |
+
anyio==3.7.1
|
| 6 |
+
async-timeout==4.0.3
|
| 7 |
+
attrs==23.1.0
|
| 8 |
+
beautifulsoup4==4.12.2
|
| 9 |
+
certifi==2023.7.22
|
| 10 |
+
charset-normalizer==3.2.0
|
| 11 |
+
click==8.1.7
|
| 12 |
+
contourpy==1.1.0
|
| 13 |
+
cycler==0.11.0
|
| 14 |
+
dacite==1.8.1
|
| 15 |
+
exceptiongroup==1.1.3
|
| 16 |
+
fastapi==0.103.1
|
| 17 |
+
ffmpy==0.3.1
|
| 18 |
+
filelock==3.12.3
|
| 19 |
+
fonttools==4.42.1
|
| 20 |
+
frozenlist==1.4.0
|
| 21 |
+
fsspec==2023.9.0
|
| 22 |
+
gradio==3.42.0
|
| 23 |
+
gradio_client==0.5.0
|
| 24 |
+
gTTS==2.3.2
|
| 25 |
+
h11==0.14.0
|
| 26 |
+
htmlmin==0.1.12
|
| 27 |
+
httpcore==0.17.3
|
| 28 |
+
httpx==0.24.1
|
| 29 |
+
huggingface-hub==0.16.4
|
| 30 |
+
idna==3.4
|
| 31 |
+
ImageHash==4.3.1
|
| 32 |
+
importlib-resources==6.0.1
|
| 33 |
+
Jinja2==3.1.2
|
| 34 |
+
joblib==1.3.2
|
| 35 |
+
jsonschema==4.19.0
|
| 36 |
+
jsonschema-specifications==2023.7.1
|
| 37 |
+
kiwisolver==1.4.5
|
| 38 |
+
llvmlite==0.40.1
|
| 39 |
+
markdownify==0.11.6
|
| 40 |
+
MarkupSafe==2.1.3
|
| 41 |
+
matplotlib==3.7.2
|
| 42 |
+
more-itertools==10.1.0
|
| 43 |
+
mpmath==1.3.0
|
| 44 |
+
multidict==6.0.4
|
| 45 |
+
multimethod==1.9.1
|
| 46 |
+
networkx==3.1
|
| 47 |
+
numba==0.57.1
|
| 48 |
+
numpy==1.23.5
|
| 49 |
+
openai==0.28.0
|
| 50 |
+
openai-whisper @ git+https://github.com/openai/whisper.git@e8622f9afc4eba139bf796c210f5c01081000472
|
| 51 |
+
orjson==3.9.5
|
| 52 |
+
outcome==1.2.0
|
| 53 |
+
packaging==23.1
|
| 54 |
+
pandas==2.0.3
|
| 55 |
+
pandas-profiling==3.6.6
|
| 56 |
+
patsy==0.5.3
|
| 57 |
+
phik==0.12.3
|
| 58 |
+
Pillow==10.0.0
|
| 59 |
+
pyChatGPT==0.4.3.3
|
| 60 |
+
pydantic==1.10.12
|
| 61 |
+
pydub==0.25.1
|
| 62 |
+
pyparsing==3.0.9
|
| 63 |
+
PySocks==1.7.1
|
| 64 |
+
python-dateutil==2.8.2
|
| 65 |
+
python-multipart==0.0.6
|
| 66 |
+
pytz==2023.3
|
| 67 |
+
PyWavelets==1.4.1
|
| 68 |
+
PyYAML==6.0.1
|
| 69 |
+
referencing==0.30.2
|
| 70 |
+
regex==2023.8.8
|
| 71 |
+
requests==2.31.0
|
| 72 |
+
rpds-py==0.10.2
|
| 73 |
+
scipy==1.11.2
|
| 74 |
+
seaborn==0.12.2
|
| 75 |
+
selenium==4.12.0
|
| 76 |
+
semantic-version==2.10.0
|
| 77 |
+
six==1.16.0
|
| 78 |
+
sniffio==1.3.0
|
| 79 |
+
sortedcontainers==2.4.0
|
| 80 |
+
soupsieve==2.5
|
| 81 |
+
starlette==0.27.0
|
| 82 |
+
statsmodels==0.14.0
|
| 83 |
+
sympy==1.12
|
| 84 |
+
tangled-up-in-unicode==0.2.0
|
| 85 |
+
tiktoken==0.3.3
|
| 86 |
+
toolz==0.12.0
|
| 87 |
+
torch==2.0.1
|
| 88 |
+
tqdm==4.66.1
|
| 89 |
+
trio==0.22.2
|
| 90 |
+
trio-websocket==0.10.4
|
| 91 |
+
typeguard==2.13.3
|
| 92 |
+
typing_extensions==4.7.1
|
| 93 |
+
tzdata==2023.3
|
| 94 |
+
undetected-chromedriver==3.5.3
|
| 95 |
+
urllib3==2.0.4
|
| 96 |
+
uvicorn==0.23.2
|
| 97 |
+
visions==0.7.5
|
| 98 |
+
websockets==11.0.3
|
| 99 |
+
whisper==1.1.10
|
| 100 |
+
wordcloud==1.9.2
|
| 101 |
+
wsproto==1.2.0
|
| 102 |
+
yarl==1.9.2
|
| 103 |
+
ydata-profiling==4.5.1
|
| 104 |
+
zipp==3.16.2
|