Thiago Hersan commited on
Commit ·
13d6b20
1
Parent(s): 96e20a1
add app files
Browse files- .gitattributes +10 -35
- .github/workflows/deploy-hf.yml +25 -0
- .gitignore +3 -0
- README.md +6 -7
- app.py +37 -0
- audio/plain_01.wav +3 -0
- audio/plain_02.wav +3 -0
- audio/plain_03.wav +3 -0
- audio/plain_04.wav +3 -0
- audio/plain_04b.wav +3 -0
- audio/plain_05.wav +3 -0
- audio/secret_01.wav +3 -0
- audio/secret_02.wav +3 -0
- audio/secret_03.wav +3 -0
- audio/secret_04.wav +3 -0
- requirements.txt +4 -0
.gitattributes
CHANGED
|
@@ -1,35 +1,10 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
-
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
-
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
-
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
-
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
-
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
-
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
-
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
-
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
-
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
-
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
-
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
-
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
-
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
-
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
-
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
-
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
-
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
-
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
| 1 |
+
audio/plain_03.wav filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
audio/plain_04.wav filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
audio/plain_05.wav filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
audio/secret_01.wav filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
audio/secret_02.wav filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
audio/plain_02.wav filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
audio/plain_04b.wav filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
audio/secret_03.wav filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
audio/secret_04.wav filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
audio/plain_01.wav filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.github/workflows/deploy-hf.yml
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Deploy to Hugging Face spaces
|
| 2 |
+
|
| 3 |
+
on:
|
| 4 |
+
push:
|
| 5 |
+
branches:
|
| 6 |
+
- main
|
| 7 |
+
|
| 8 |
+
jobs:
|
| 9 |
+
build:
|
| 10 |
+
runs-on: ubuntu-latest
|
| 11 |
+
|
| 12 |
+
steps:
|
| 13 |
+
- name: Checkout Dev Repo
|
| 14 |
+
uses: actions/checkout@v3
|
| 15 |
+
with:
|
| 16 |
+
fetch-depth: 0
|
| 17 |
+
lfs: true
|
| 18 |
+
|
| 19 |
+
- name: Push to HF
|
| 20 |
+
env:
|
| 21 |
+
HFTOKEN: ${{ secrets.HFTOKEN }}
|
| 22 |
+
|
| 23 |
+
run: |
|
| 24 |
+
git remote add hf https://thiagohersan:$HFTOKEN@huggingface.co/spaces/visualizedata/5020-STT-Gradio
|
| 25 |
+
git push hf main
|
.gitignore
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.DS_S*
|
| 2 |
+
__pycache__/
|
| 3 |
+
gradio_cached_examples/
|
README.md
CHANGED
|
@@ -1,12 +1,11 @@
|
|
| 1 |
---
|
| 2 |
-
title: 5020
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: gradio
|
| 7 |
-
|
|
|
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
---
|
| 11 |
-
|
| 12 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
| 1 |
---
|
| 2 |
+
title: 5020 whisper-base-en Speech-to-Text
|
| 3 |
+
emoji: 🔊
|
| 4 |
+
colorFrom: indigo
|
| 5 |
+
colorTo: yellow
|
| 6 |
sdk: gradio
|
| 7 |
+
python_version: 3.10.12
|
| 8 |
+
sdk_version: 5.0.2
|
| 9 |
app_file: app.py
|
| 10 |
pinned: false
|
| 11 |
---
|
|
|
|
|
|
app.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import numpy as np
|
| 3 |
+
|
| 4 |
+
from librosa import resample
|
| 5 |
+
from transformers import pipeline
|
| 6 |
+
|
| 7 |
+
pipe = pipeline("automatic-speech-recognition", model="openai/whisper-base.en", chunk_length_s=30)
|
| 8 |
+
|
| 9 |
+
def transcribe(audio_in):
|
| 10 |
+
orig_sr, samples = audio_in
|
| 11 |
+
min_s, max_s = min(samples), max(samples)
|
| 12 |
+
range_in = (max_s - min_s)
|
| 13 |
+
samples_scl = np.array(samples) / range_in
|
| 14 |
+
min_scl = min_s / range_in
|
| 15 |
+
samples_f = 2.0 * (samples_scl - min_scl) - 1.0
|
| 16 |
+
resamples = resample(samples_f, orig_sr=orig_sr, target_sr=16000)
|
| 17 |
+
prediction = pipe(resamples.copy(), batch_size=8)
|
| 18 |
+
return prediction["text"].strip().lower()
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
with gr.Blocks() as demo:
|
| 22 |
+
gr.Markdown("""
|
| 23 |
+
# 5020 Audio Transcription.
|
| 24 |
+
## API for [whisper-base.en](https://huggingface.co/openai/whisper-base.en) english model\
|
| 25 |
+
to help with Audio Analysis exercises.
|
| 26 |
+
""")
|
| 27 |
+
|
| 28 |
+
gr.Interface(
|
| 29 |
+
transcribe,
|
| 30 |
+
inputs=gr.Audio(type="numpy"),
|
| 31 |
+
outputs="text",
|
| 32 |
+
cache_examples=True,
|
| 33 |
+
examples=[["./audio/plain_01.wav"]]
|
| 34 |
+
)
|
| 35 |
+
|
| 36 |
+
if __name__ == "__main__":
|
| 37 |
+
demo.launch()
|
audio/plain_01.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a4db5391e8429e21d7c19f05c6d551e01fe168186c91d1debb055c0305e8f84f
|
| 3 |
+
size 176440
|
audio/plain_02.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4222969e675d59440f5fec9dc7dd1fa83f4901a5b370f4fb116cdced83bfdc4f
|
| 3 |
+
size 453704
|
audio/plain_03.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:58b18ff12c5ab02b3886669aebe0dfa2181006f65b8c7ae271bea60cdc0c9f19
|
| 3 |
+
size 308740
|
audio/plain_04.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6165df34e92ab966590fe5bd33b5afdae568ed2d26726bbaea142622805d0445
|
| 3 |
+
size 439442
|
audio/plain_04b.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:72aad56c76fa0e1e3d0c16dd41bfbf62ecc406ed216711c16b9b2b852464b1be
|
| 3 |
+
size 5974316
|
audio/plain_05.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:64aa9b09942a77e68820468e91af7ba00008ed520c03db90dad36292048feb31
|
| 3 |
+
size 441040
|
audio/secret_01.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b76b42a98c06aaeb1ac65ea7cf50063a76708d4e52466dc59684eab677857ae1
|
| 3 |
+
size 176440
|
audio/secret_02.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:be2e88e04a8a28b8bcb848a7c5a08f50fb5835ad0b1e5436d03eac18e16ef656
|
| 3 |
+
size 453704
|
audio/secret_03.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0555b2ef8e1c29c24cd31eb622c146d028dba3b3bc0bab5467e3325d5811d769
|
| 3 |
+
size 308740
|
audio/secret_04.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bd460839bcd94d5f51f2b641fa6b557561525ecdf04ab9c18e157d8bdf99b9f8
|
| 3 |
+
size 5712216
|
requirements.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
pydantic==2.8.2
|
| 2 |
+
librosa
|
| 3 |
+
torch
|
| 4 |
+
transformers
|