Spaces:
Sleeping
Sleeping
have it working it as a bloack
Browse files
=2.0
DELETED
|
@@ -1,191 +0,0 @@
|
|
| 1 |
-
Requirement already satisfied: gradio in /usr/local/lib/python3.10/site-packages (5.41.0)
|
| 2 |
-
Collecting transformers
|
| 3 |
-
Downloading transformers-4.55.0-py3-none-any.whl.metadata (39 kB)
|
| 4 |
-
Collecting torch
|
| 5 |
-
Downloading torch-2.8.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (30 kB)
|
| 6 |
-
Collecting faster-whisper
|
| 7 |
-
Downloading faster_whisper-1.2.0-py3-none-any.whl.metadata (16 kB)
|
| 8 |
-
Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.10/site-packages (0.34.3)
|
| 9 |
-
Requirement already satisfied: numpy in /usr/local/lib/python3.10/site-packages (2.2.6)
|
| 10 |
-
Collecting scipy
|
| 11 |
-
Downloading scipy-1.15.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
|
| 12 |
-
Collecting soundfile
|
| 13 |
-
Downloading soundfile-0.13.1-py2.py3-none-manylinux_2_28_x86_64.whl.metadata (16 kB)
|
| 14 |
-
Collecting ffmpeg-python
|
| 15 |
-
Downloading ffmpeg_python-0.2.0-py3-none-any.whl.metadata (1.7 kB)
|
| 16 |
-
Requirement already satisfied: aiofiles<25.0,>=22.0 in /usr/local/lib/python3.10/site-packages (from gradio) (24.1.0)
|
| 17 |
-
Requirement already satisfied: anyio<5.0,>=3.0 in /usr/local/lib/python3.10/site-packages (from gradio) (4.10.0)
|
| 18 |
-
Requirement already satisfied: brotli>=1.1.0 in /usr/local/lib/python3.10/site-packages (from gradio) (1.1.0)
|
| 19 |
-
Requirement already satisfied: fastapi<1.0,>=0.115.2 in /usr/local/lib/python3.10/site-packages (from gradio) (0.116.1)
|
| 20 |
-
Requirement already satisfied: ffmpy in /usr/local/lib/python3.10/site-packages (from gradio) (0.6.1)
|
| 21 |
-
Requirement already satisfied: gradio-client==1.11.0 in /usr/local/lib/python3.10/site-packages (from gradio) (1.11.0)
|
| 22 |
-
Requirement already satisfied: groovy~=0.1 in /usr/local/lib/python3.10/site-packages (from gradio) (0.1.2)
|
| 23 |
-
Requirement already satisfied: httpx<1.0,>=0.24.1 in /usr/local/lib/python3.10/site-packages (from gradio) (0.28.1)
|
| 24 |
-
Requirement already satisfied: jinja2<4.0 in /usr/local/lib/python3.10/site-packages (from gradio) (3.1.6)
|
| 25 |
-
Requirement already satisfied: markupsafe<4.0,>=2.0 in /usr/local/lib/python3.10/site-packages (from gradio) (3.0.2)
|
| 26 |
-
Requirement already satisfied: orjson~=3.0 in /usr/local/lib/python3.10/site-packages (from gradio) (3.11.1)
|
| 27 |
-
Requirement already satisfied: packaging in /usr/local/lib/python3.10/site-packages (from gradio) (25.0)
|
| 28 |
-
Requirement already satisfied: pandas<3.0,>=1.0 in /usr/local/lib/python3.10/site-packages (from gradio) (2.3.1)
|
| 29 |
-
Requirement already satisfied: pillow<12.0,>=8.0 in /usr/local/lib/python3.10/site-packages (from gradio) (11.3.0)
|
| 30 |
-
Requirement already satisfied: pydantic<2.12,>=2.0 in /usr/local/lib/python3.10/site-packages (from gradio) (2.11.7)
|
| 31 |
-
Requirement already satisfied: pydub in /usr/local/lib/python3.10/site-packages (from gradio) (0.25.1)
|
| 32 |
-
Requirement already satisfied: python-multipart>=0.0.18 in /usr/local/lib/python3.10/site-packages (from gradio) (0.0.20)
|
| 33 |
-
Requirement already satisfied: pyyaml<7.0,>=5.0 in /usr/local/lib/python3.10/site-packages (from gradio) (6.0.2)
|
| 34 |
-
Requirement already satisfied: ruff>=0.9.3 in /usr/local/lib/python3.10/site-packages (from gradio) (0.12.7)
|
| 35 |
-
Requirement already satisfied: safehttpx<0.2.0,>=0.1.6 in /usr/local/lib/python3.10/site-packages (from gradio) (0.1.6)
|
| 36 |
-
Requirement already satisfied: semantic-version~=2.0 in /usr/local/lib/python3.10/site-packages (from gradio) (2.10.0)
|
| 37 |
-
Requirement already satisfied: starlette<1.0,>=0.40.0 in /usr/local/lib/python3.10/site-packages (from gradio) (0.47.2)
|
| 38 |
-
Requirement already satisfied: tomlkit<0.14.0,>=0.12.0 in /usr/local/lib/python3.10/site-packages (from gradio) (0.13.3)
|
| 39 |
-
Requirement already satisfied: typer<1.0,>=0.12 in /usr/local/lib/python3.10/site-packages (from gradio) (0.16.0)
|
| 40 |
-
Requirement already satisfied: typing-extensions~=4.0 in /usr/local/lib/python3.10/site-packages (from gradio) (4.14.1)
|
| 41 |
-
Requirement already satisfied: uvicorn>=0.14.0 in /usr/local/lib/python3.10/site-packages (from gradio) (0.35.0)
|
| 42 |
-
Requirement already satisfied: fsspec in /usr/local/lib/python3.10/site-packages (from gradio-client==1.11.0->gradio) (2025.3.0)
|
| 43 |
-
Requirement already satisfied: websockets<16.0,>=10.0 in /usr/local/lib/python3.10/site-packages (from gradio-client==1.11.0->gradio) (15.0.1)
|
| 44 |
-
Requirement already satisfied: filelock in /usr/local/lib/python3.10/site-packages (from huggingface-hub) (3.18.0)
|
| 45 |
-
Requirement already satisfied: requests in /usr/local/lib/python3.10/site-packages (from huggingface-hub) (2.32.4)
|
| 46 |
-
Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.10/site-packages (from huggingface-hub) (4.67.1)
|
| 47 |
-
Requirement already satisfied: hf-xet<2.0.0,>=1.1.3 in /usr/local/lib/python3.10/site-packages (from huggingface-hub) (1.1.7)
|
| 48 |
-
Requirement already satisfied: exceptiongroup>=1.0.2 in /usr/local/lib/python3.10/site-packages (from anyio<5.0,>=3.0->gradio) (1.3.0)
|
| 49 |
-
Requirement already satisfied: idna>=2.8 in /usr/local/lib/python3.10/site-packages (from anyio<5.0,>=3.0->gradio) (3.10)
|
| 50 |
-
Requirement already satisfied: sniffio>=1.1 in /usr/local/lib/python3.10/site-packages (from anyio<5.0,>=3.0->gradio) (1.3.1)
|
| 51 |
-
Requirement already satisfied: certifi in /usr/local/lib/python3.10/site-packages (from httpx<1.0,>=0.24.1->gradio) (2025.8.3)
|
| 52 |
-
Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.10/site-packages (from httpx<1.0,>=0.24.1->gradio) (1.0.9)
|
| 53 |
-
Requirement already satisfied: h11>=0.16 in /usr/local/lib/python3.10/site-packages (from httpcore==1.*->httpx<1.0,>=0.24.1->gradio) (0.16.0)
|
| 54 |
-
Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/site-packages (from pandas<3.0,>=1.0->gradio) (2.9.0.post0)
|
| 55 |
-
Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/site-packages (from pandas<3.0,>=1.0->gradio) (2025.2)
|
| 56 |
-
Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.10/site-packages (from pandas<3.0,>=1.0->gradio) (2025.2)
|
| 57 |
-
Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.10/site-packages (from pydantic<2.12,>=2.0->gradio) (0.7.0)
|
| 58 |
-
Requirement already satisfied: pydantic-core==2.33.2 in /usr/local/lib/python3.10/site-packages (from pydantic<2.12,>=2.0->gradio) (2.33.2)
|
| 59 |
-
Requirement already satisfied: typing-inspection>=0.4.0 in /usr/local/lib/python3.10/site-packages (from pydantic<2.12,>=2.0->gradio) (0.4.1)
|
| 60 |
-
Requirement already satisfied: click>=8.0.0 in /usr/local/lib/python3.10/site-packages (from typer<1.0,>=0.12->gradio) (8.0.4)
|
| 61 |
-
Requirement already satisfied: shellingham>=1.3.0 in /usr/local/lib/python3.10/site-packages (from typer<1.0,>=0.12->gradio) (1.5.4)
|
| 62 |
-
Requirement already satisfied: rich>=10.11.0 in /usr/local/lib/python3.10/site-packages (from typer<1.0,>=0.12->gradio) (14.1.0)
|
| 63 |
-
Collecting regex!=2019.12.17 (from transformers)
|
| 64 |
-
Downloading regex-2025.7.34-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl.metadata (40 kB)
|
| 65 |
-
Collecting tokenizers<0.22,>=0.21 (from transformers)
|
| 66 |
-
Downloading tokenizers-0.21.4-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
|
| 67 |
-
Collecting safetensors>=0.4.3 (from transformers)
|
| 68 |
-
Downloading safetensors-0.6.1-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.1 kB)
|
| 69 |
-
Collecting sympy>=1.13.3 (from torch)
|
| 70 |
-
Downloading sympy-1.14.0-py3-none-any.whl.metadata (12 kB)
|
| 71 |
-
Collecting networkx (from torch)
|
| 72 |
-
Downloading networkx-3.4.2-py3-none-any.whl.metadata (6.3 kB)
|
| 73 |
-
Collecting nvidia-cuda-nvrtc-cu12==12.8.93 (from torch)
|
| 74 |
-
Downloading nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl.metadata (1.7 kB)
|
| 75 |
-
Collecting nvidia-cuda-runtime-cu12==12.8.90 (from torch)
|
| 76 |
-
Downloading nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.7 kB)
|
| 77 |
-
Collecting nvidia-cuda-cupti-cu12==12.8.90 (from torch)
|
| 78 |
-
Downloading nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.7 kB)
|
| 79 |
-
Collecting nvidia-cudnn-cu12==9.10.2.21 (from torch)
|
| 80 |
-
Downloading nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl.metadata (1.8 kB)
|
| 81 |
-
Collecting nvidia-cublas-cu12==12.8.4.1 (from torch)
|
| 82 |
-
Downloading nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_x86_64.whl.metadata (1.7 kB)
|
| 83 |
-
Collecting nvidia-cufft-cu12==11.3.3.83 (from torch)
|
| 84 |
-
Downloading nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.7 kB)
|
| 85 |
-
Collecting nvidia-curand-cu12==10.3.9.90 (from torch)
|
| 86 |
-
Downloading nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_x86_64.whl.metadata (1.7 kB)
|
| 87 |
-
Collecting nvidia-cusolver-cu12==11.7.3.90 (from torch)
|
| 88 |
-
Downloading nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl.metadata (1.8 kB)
|
| 89 |
-
Collecting nvidia-cusparse-cu12==12.5.8.93 (from torch)
|
| 90 |
-
Downloading nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.8 kB)
|
| 91 |
-
Collecting nvidia-cusparselt-cu12==0.7.1 (from torch)
|
| 92 |
-
Downloading nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_x86_64.whl.metadata (7.0 kB)
|
| 93 |
-
Collecting nvidia-nccl-cu12==2.27.3 (from torch)
|
| 94 |
-
Downloading nvidia_nccl_cu12-2.27.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (2.0 kB)
|
| 95 |
-
Collecting nvidia-nvtx-cu12==12.8.90 (from torch)
|
| 96 |
-
Downloading nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.8 kB)
|
| 97 |
-
Collecting nvidia-nvjitlink-cu12==12.8.93 (from torch)
|
| 98 |
-
Downloading nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl.metadata (1.7 kB)
|
| 99 |
-
Collecting nvidia-cufile-cu12==1.13.1.3 (from torch)
|
| 100 |
-
Downloading nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.7 kB)
|
| 101 |
-
Collecting triton==3.4.0 (from torch)
|
| 102 |
-
Downloading triton-3.4.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (1.7 kB)
|
| 103 |
-
Requirement already satisfied: setuptools>=40.8.0 in /usr/local/lib/python3.10/site-packages (from triton==3.4.0->torch) (65.5.1)
|
| 104 |
-
Collecting ctranslate2<5,>=4.0 (from faster-whisper)
|
| 105 |
-
Downloading ctranslate2-4.6.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (10 kB)
|
| 106 |
-
Collecting onnxruntime<2,>=1.14 (from faster-whisper)
|
| 107 |
-
Downloading onnxruntime-1.22.1-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (4.6 kB)
|
| 108 |
-
Collecting av>=11 (from faster-whisper)
|
| 109 |
-
Downloading av-15.0.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (4.6 kB)
|
| 110 |
-
Collecting coloredlogs (from onnxruntime<2,>=1.14->faster-whisper)
|
| 111 |
-
Downloading coloredlogs-15.0.1-py2.py3-none-any.whl.metadata (12 kB)
|
| 112 |
-
Collecting flatbuffers (from onnxruntime<2,>=1.14->faster-whisper)
|
| 113 |
-
Downloading flatbuffers-25.2.10-py2.py3-none-any.whl.metadata (875 bytes)
|
| 114 |
-
Requirement already satisfied: protobuf in /usr/local/lib/python3.10/site-packages (from onnxruntime<2,>=1.14->faster-whisper) (3.20.3)
|
| 115 |
-
Requirement already satisfied: cffi>=1.0 in /usr/local/lib/python3.10/site-packages (from soundfile) (1.17.1)
|
| 116 |
-
Collecting future (from ffmpeg-python)
|
| 117 |
-
Downloading future-1.0.0-py3-none-any.whl.metadata (4.0 kB)
|
| 118 |
-
Requirement already satisfied: pycparser in /usr/local/lib/python3.10/site-packages (from cffi>=1.0->soundfile) (2.22)
|
| 119 |
-
Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas<3.0,>=1.0->gradio) (1.17.0)
|
| 120 |
-
Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/site-packages (from rich>=10.11.0->typer<1.0,>=0.12->gradio) (3.0.0)
|
| 121 |
-
Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/site-packages (from rich>=10.11.0->typer<1.0,>=0.12->gradio) (2.19.2)
|
| 122 |
-
Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/site-packages (from markdown-it-py>=2.2.0->rich>=10.11.0->typer<1.0,>=0.12->gradio) (0.1.2)
|
| 123 |
-
Collecting mpmath<1.4,>=1.1.0 (from sympy>=1.13.3->torch)
|
| 124 |
-
Downloading mpmath-1.3.0-py3-none-any.whl.metadata (8.6 kB)
|
| 125 |
-
Collecting humanfriendly>=9.1 (from coloredlogs->onnxruntime<2,>=1.14->faster-whisper)
|
| 126 |
-
Downloading humanfriendly-10.0-py2.py3-none-any.whl.metadata (9.2 kB)
|
| 127 |
-
Requirement already satisfied: charset_normalizer<4,>=2 in /usr/local/lib/python3.10/site-packages (from requests->huggingface-hub) (3.4.2)
|
| 128 |
-
Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/site-packages (from requests->huggingface-hub) (2.5.0)
|
| 129 |
-
Downloading transformers-4.55.0-py3-none-any.whl (11.3 MB)
|
| 130 |
-
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 11.3/11.3 MB 157.2 MB/s 0:00:00
|
| 131 |
-
Downloading tokenizers-0.21.4-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)
|
| 132 |
-
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 3.1/3.1 MB 168.3 MB/s 0:00:00
|
| 133 |
-
Downloading torch-2.8.0-cp310-cp310-manylinux_2_28_x86_64.whl (888.0 MB)
|
| 134 |
-
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 888.0/888.0 MB 105.1 MB/s 0:00:06
|
| 135 |
-
Downloading nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_x86_64.whl (594.3 MB)
|
| 136 |
-
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 594.3/594.3 MB 258.5 MB/s 0:00:01
|
| 137 |
-
Downloading nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (10.2 MB)
|
| 138 |
-
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 10.2/10.2 MB 226.6 MB/s 0:00:00
|
| 139 |
-
Downloading nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl (88.0 MB)
|
| 140 |
-
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 88.0/88.0 MB 378.9 MB/s 0:00:00
|
| 141 |
-
Downloading nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (954 kB)
|
| 142 |
-
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 954.8/954.8 kB 69.2 MB/s 0:00:00
|
| 143 |
-
Downloading nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl (706.8 MB)
|
| 144 |
-
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 706.8/706.8 MB 286.2 MB/s 0:00:01
|
| 145 |
-
Downloading nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (193.1 MB)
|
| 146 |
-
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 193.1/193.1 MB 163.9 MB/s 0:00:01
|
| 147 |
-
Downloading nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (1.2 MB)
|
| 148 |
-
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.2/1.2 MB 89.5 MB/s 0:00:00
|
| 149 |
-
Downloading nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_x86_64.whl (63.6 MB)
|
| 150 |
-
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 63.6/63.6 MB 303.5 MB/s 0:00:00
|
| 151 |
-
Downloading nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl (267.5 MB)
|
| 152 |
-
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 267.5/267.5 MB 265.9 MB/s 0:00:00
|
| 153 |
-
Downloading nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (288.2 MB)
|
| 154 |
-
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 288.2/288.2 MB 383.7 MB/s 0:00:00
|
| 155 |
-
Downloading nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_x86_64.whl (287.2 MB)
|
| 156 |
-
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 287.2/287.2 MB 389.0 MB/s 0:00:00
|
| 157 |
-
Downloading nvidia_nccl_cu12-2.27.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (322.4 MB)
|
| 158 |
-
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 322.4/322.4 MB 298.2 MB/s 0:00:01
|
| 159 |
-
Downloading nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl (39.3 MB)
|
| 160 |
-
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 39.3/39.3 MB 378.8 MB/s 0:00:00
|
| 161 |
-
Downloading nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (89 kB)
|
| 162 |
-
Downloading triton-3.4.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (155.4 MB)
|
| 163 |
-
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 155.4/155.4 MB 163.6 MB/s 0:00:00
|
| 164 |
-
Downloading faster_whisper-1.2.0-py3-none-any.whl (1.1 MB)
|
| 165 |
-
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.1/1.1 MB 76.0 MB/s 0:00:00
|
| 166 |
-
Downloading ctranslate2-4.6.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (38.4 MB)
|
| 167 |
-
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 38.4/38.4 MB 161.9 MB/s 0:00:00
|
| 168 |
-
Downloading onnxruntime-1.22.1-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (16.5 MB)
|
| 169 |
-
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 16.5/16.5 MB 163.1 MB/s 0:00:00
|
| 170 |
-
Downloading scipy-1.15.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (37.7 MB)
|
| 171 |
-
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 37.7/37.7 MB 395.4 MB/s 0:00:00
|
| 172 |
-
Downloading soundfile-0.13.1-py2.py3-none-manylinux_2_28_x86_64.whl (1.3 MB)
|
| 173 |
-
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.3/1.3 MB 33.6 MB/s 0:00:00
|
| 174 |
-
Downloading ffmpeg_python-0.2.0-py3-none-any.whl (25 kB)
|
| 175 |
-
Downloading av-15.0.0-cp310-cp310-manylinux_2_28_x86_64.whl (39.2 MB)
|
| 176 |
-
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 39.2/39.2 MB 163.0 MB/s 0:00:00
|
| 177 |
-
Downloading regex-2025.7.34-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl (789 kB)
|
| 178 |
-
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 789.8/789.8 kB 50.5 MB/s 0:00:00
|
| 179 |
-
Downloading safetensors-0.6.1-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (485 kB)
|
| 180 |
-
Downloading sympy-1.14.0-py3-none-any.whl (6.3 MB)
|
| 181 |
-
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 6.3/6.3 MB 209.0 MB/s 0:00:00
|
| 182 |
-
Downloading mpmath-1.3.0-py3-none-any.whl (536 kB)
|
| 183 |
-
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 536.2/536.2 kB 18.0 MB/s 0:00:00
|
| 184 |
-
Downloading coloredlogs-15.0.1-py2.py3-none-any.whl (46 kB)
|
| 185 |
-
Downloading humanfriendly-10.0-py2.py3-none-any.whl (86 kB)
|
| 186 |
-
Downloading flatbuffers-25.2.10-py2.py3-none-any.whl (30 kB)
|
| 187 |
-
Downloading future-1.0.0-py3-none-any.whl (491 kB)
|
| 188 |
-
Downloading networkx-3.4.2-py3-none-any.whl (1.7 MB)
|
| 189 |
-
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.7/1.7 MB 129.1 MB/s 0:00:00
|
| 190 |
-
Installing collected packages: nvidia-cusparselt-cu12, mpmath, flatbuffers, triton, sympy, scipy, safetensors, regex, nvidia-nvtx-cu12, nvidia-nvjitlink-cu12, nvidia-nccl-cu12, nvidia-curand-cu12, nvidia-cufile-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, networkx, humanfriendly, future, ctranslate2, av, soundfile, nvidia-cusparse-cu12, nvidia-cufft-cu12, nvidia-cudnn-cu12, ffmpeg-python, coloredlogs, tokenizers, onnxruntime, nvidia-cusolver-cu12, transformers, torch, faster-whisper
|
| 191 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
=3.30
DELETED
|
File without changes
|
=4.30
DELETED
|
File without changes
|
app.py
CHANGED
|
@@ -1,239 +1,45 @@
|
|
| 1 |
-
# app.py
|
| 2 |
-
# Minimal Gradio voice-assistant demo for Hugging Face Spaces.
|
| 3 |
-
# - STT: faster-whisper (WhisperModel)
|
| 4 |
-
# - Sentiment: transformers pipeline
|
| 5 |
-
# - LLM: local HF model (small) or HF Inference API (recommended for huge models)
|
| 6 |
-
# - TTS: bosonai/higgs-audio-v2 (when available)
|
| 7 |
-
#
|
| 8 |
-
# NOTES:
|
| 9 |
-
# - Replace model names with versions you prefer.
|
| 10 |
-
# - If you want streaming / low-latency WebRTC, see the "Advanced: WebRTC / streaming" comments below.
|
| 11 |
-
|
| 12 |
-
import os
|
| 13 |
-
import tempfile
|
| 14 |
-
import uuid
|
| 15 |
-
from pathlib import Path
|
| 16 |
-
from faster_whisper import WhisperModel
|
| 17 |
-
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
|
| 18 |
-
import torch
|
| 19 |
import gradio as gr
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
# --------------------
|
| 23 |
-
# CONFIG / ENV VARS
|
| 24 |
-
# --------------------
|
| 25 |
-
HUGGINGFACEHUB_API_TOKEN = os.environ.get("HUGGINGFACEHUB_API_TOKEN") # optional for hosted inference
|
| 26 |
-
USE_HOSTED_LLM = bool(os.environ.get("USE_HOSTED_LLM", "")) # set to "1" to use hosted HF inference instead of a local model
|
| 27 |
-
LLM_MODEL_NAME = os.environ.get("LLM_MODEL_NAME", "gpt2") # default small model for Spaces demo
|
| 28 |
-
TTS_MODEL = os.environ.get("TTS_MODEL", "bosonai/higgs-audio-v2-generation-3B-base") # example; may need HF token
|
| 29 |
-
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
| 30 |
-
|
| 31 |
-
# --------------------
|
| 32 |
-
# Initialize models
|
| 33 |
-
# --------------------
|
| 34 |
-
|
| 35 |
-
# Faster Whisper STT
|
| 36 |
-
# Choose model size: "small", "medium", "large-v2", etc. Smaller -> lower latency.
|
| 37 |
-
whisper_model_size = "small"
|
| 38 |
-
whisper = WhisperModel(whisper_model_size, device=DEVICE, compute_type="int8_float16") # compute_type optional based on your hardware
|
| 39 |
-
|
| 40 |
-
# Sentiment / conversation cue analysis
|
| 41 |
-
sentiment = pipeline("sentiment-analysis", device=0 if DEVICE == "cuda" else -1)
|
| 42 |
-
|
| 43 |
-
# LLM (local fallback)
|
| 44 |
-
local_llm = None
|
| 45 |
-
local_tokenizer = None
|
| 46 |
-
if not USE_HOSTED_LLM:
|
| 47 |
-
try:
|
| 48 |
-
# Keep this small on Spaces by default. Swap to a bigger model if you have the hardware.
|
| 49 |
-
local_llm_name = LLM_MODEL_NAME # e.g., "gpt2", "EleutherAI/gpt-neo-125M", or an HF chat model
|
| 50 |
-
local_tokenizer = AutoTokenizer.from_pretrained(local_llm_name)
|
| 51 |
-
local_llm = AutoModelForCausalLM.from_pretrained(local_llm_name, device_map="auto" if DEVICE=="cuda" else None)
|
| 52 |
-
except Exception as e:
|
| 53 |
-
print("Failed to load local LLM:", e)
|
| 54 |
-
local_llm = None
|
| 55 |
-
|
| 56 |
-
# Hosted inference (optional)
|
| 57 |
-
hf_inference = None
|
| 58 |
-
if USE_HOSTED_LLM and HUGGINGFACEHUB_API_TOKEN:
|
| 59 |
-
hf_inference = InferenceApi(repo_id=os.environ.get("HF_INFERENCE_MODEL", "openai/gpt-oss-7b"), token=HUGGINGFACEHUB_API_TOKEN)
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
# TTS: This is an illustration. For actual use, replace with a tested TTS pipeline suited to the model.
|
| 63 |
-
def tts_generate(text: str, out_path: str) -> str:
|
| 64 |
-
"""
|
| 65 |
-
Generate TTS audio for `text` and save to out_path.
|
| 66 |
-
For Higgs Audio v2 (BosonAI) you likely need a specific inference client or HF pipeline that supports that repo.
|
| 67 |
-
This function is a stub illustrating where TTS would be called.
|
| 68 |
-
"""
|
| 69 |
-
# Option A: If the TTS repo provides a pipeline via transformers (pseudo):
|
| 70 |
-
# from transformers import pipeline
|
| 71 |
-
# tts_pipe = pipeline("text-to-speech", model=TTS_MODEL, use_auth_token=HUGGINGFACEHUB_API_TOKEN)
|
| 72 |
-
# audio_array, sr = tts_pipe(text)
|
| 73 |
-
# save audio to out_path here...
|
| 74 |
-
#
|
| 75 |
-
# Option B: If using hosted HF inference:
|
| 76 |
-
if HUGGINGFACEHUB_API_TOKEN and os.environ.get("USE_HOSTED_TTS"):
|
| 77 |
-
# Using HF Inference API (simplified)
|
| 78 |
-
from huggingface_hub import InferenceApi
|
| 79 |
-
inf = InferenceApi(repo_id=TTS_MODEL, token=HUGGINGFACEHUB_API_TOKEN)
|
| 80 |
-
res = inf(text, {"wait_for_model": True})
|
| 81 |
-
# hf returns audio in base64 or a URL depending on model; handle accordingly (pseudo)
|
| 82 |
-
# Save binary to out_path...
|
| 83 |
-
# Fallback: return an empty audio or a short beep (silence)
|
| 84 |
-
# For demo, create a tiny silent WAV so Gradio can play something:
|
| 85 |
-
import wave, struct
|
| 86 |
-
with wave.open(out_path, "wb") as wf:
|
| 87 |
-
n_channels = 1
|
| 88 |
-
sampwidth = 2
|
| 89 |
-
framerate = 16000
|
| 90 |
-
n_frames = framerate // 10 # 0.1s silence
|
| 91 |
-
wf.setnchannels(n_channels)
|
| 92 |
-
wf.setsampwidth(sampwidth)
|
| 93 |
-
wf.setframerate(framerate)
|
| 94 |
-
wf.writeframes(b"\x00\x00" * n_frames)
|
| 95 |
-
return out_path
|
| 96 |
-
|
| 97 |
-
# --------------------
|
| 98 |
-
# Core pipeline functions
|
| 99 |
-
# --------------------
|
| 100 |
-
|
| 101 |
-
def transcribe_audio(audio_file: str):
|
| 102 |
-
"""
|
| 103 |
-
audio_file: path to uploaded audio (wav/m4a/ogg)
|
| 104 |
-
returns: transcription string and whisper segments (list)
|
| 105 |
-
"""
|
| 106 |
-
segments = []
|
| 107 |
-
text_acc = []
|
| 108 |
-
# faster-whisper: transcribe file
|
| 109 |
-
# model.transcribe returns generator of segments when streaming; we call full transcription here
|
| 110 |
-
try:
|
| 111 |
-
result = whisper.transcribe(audio_file, beam_size=5, word_timestamps=False)
|
| 112 |
-
# result is a list of segments or object depending on version. We'll handle simple case:
|
| 113 |
-
# If result has 'segments' attribute (older/variant) handle it:
|
| 114 |
-
if hasattr(result, "segments"):
|
| 115 |
-
for seg in result.segments:
|
| 116 |
-
segments.append({"start": seg.start, "end": seg.end, "text": seg.text})
|
| 117 |
-
text_acc.append(seg.text)
|
| 118 |
-
else:
|
| 119 |
-
# result may be tuple/text depending on faster-whisper version
|
| 120 |
-
if isinstance(result, dict) and "text" in result:
|
| 121 |
-
text_acc.append(result["text"])
|
| 122 |
-
elif isinstance(result, str):
|
| 123 |
-
text_acc.append(result)
|
| 124 |
-
except Exception as e:
|
| 125 |
-
print("STT error:", e)
|
| 126 |
-
return "",""
|
| 127 |
|
| 128 |
-
|
| 129 |
-
return transcription, segments
|
| 130 |
|
| 131 |
-
def
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
Returns text response for prompt. Uses hosted HF inference if USE_HOSTED_LLM else local model.
|
| 144 |
-
conversation_history: list of (user, assistant) tuples (not deeply used in demo)
|
| 145 |
-
"""
|
| 146 |
-
if USE_HOSTED_LLM and hf_inference:
|
| 147 |
-
# Use HF Inference API (simplified)
|
| 148 |
-
payload = {"inputs": prompt}
|
| 149 |
-
try:
|
| 150 |
-
result = hf_inference(prompt)
|
| 151 |
-
# result could be a dict with 'generated_text' or a string
|
| 152 |
-
if isinstance(result, dict) and "generated_text" in result:
|
| 153 |
-
return result["generated_text"]
|
| 154 |
-
elif isinstance(result, str):
|
| 155 |
-
return result
|
| 156 |
-
else:
|
| 157 |
-
return str(result)
|
| 158 |
-
except Exception as e:
|
| 159 |
-
print("Hosted LLM error:", e)
|
| 160 |
-
return "Sorry — I couldn't reach the hosted model."
|
| 161 |
-
elif local_llm and local_tokenizer:
|
| 162 |
-
# Local generation (small models suitable for Spaces)
|
| 163 |
-
inputs = local_tokenizer(prompt, return_tensors="pt").to(DEVICE)
|
| 164 |
-
outputs = local_llm.generate(**inputs, max_new_tokens=max_length)
|
| 165 |
-
return local_tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 166 |
else:
|
| 167 |
-
|
|
|
|
|
|
|
|
|
|
| 168 |
|
| 169 |
-
# --------------------
|
| 170 |
-
# Gradio UI callbacks
|
| 171 |
-
# --------------------
|
| 172 |
|
| 173 |
-
|
| 174 |
-
""
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
"""
|
| 178 |
-
if audio is None:
|
| 179 |
-
return "","", "No audio received.", None
|
| 180 |
-
# gradio audio file can be a dict or tuple; handle both
|
| 181 |
-
if isinstance(audio, tuple) or isinstance(audio, list):
|
| 182 |
-
# (sample_rate, numpy_array) OR (path, sample_rate)
|
| 183 |
-
# gradio sends a path when type="filepath"
|
| 184 |
-
audio_path = audio[0] if isinstance(audio[0], str) else None
|
| 185 |
-
elif isinstance(audio, str) and Path(audio).exists():
|
| 186 |
-
audio_path = audio
|
| 187 |
-
else:
|
| 188 |
-
# try to handle (np_array, sr)
|
| 189 |
-
import soundfile as sf
|
| 190 |
-
tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
|
| 191 |
-
sf.write(tmp.name, audio[0], audio[1])
|
| 192 |
-
audio_path = tmp.name
|
| 193 |
-
|
| 194 |
-
transcription, segments = transcribe_audio(audio_path)
|
| 195 |
-
sentiment_res = analyze_sentiment(transcription)
|
| 196 |
-
sentiment_label = sentiment_res.get("label", "UNKNOWN")
|
| 197 |
-
# Build a prompt for LLM
|
| 198 |
-
prompt = f"User: {transcription}\n\nAssistant:"
|
| 199 |
-
assistant_text = llm_respond(prompt)
|
| 200 |
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
|
|
|
|
|
|
|
|
|
| 204 |
|
| 205 |
-
return transcription, sentiment_label, assistant_text, out_audio_path
|
| 206 |
|
| 207 |
-
# --------------------
|
| 208 |
-
# GRADIO APP
|
| 209 |
-
# --------------------
|
| 210 |
-
with gr.Blocks(title="Voice Assistant (Faster Whisper + LLM + Higgs Audio)") as demo:
|
| 211 |
-
gr.Markdown("# Voice Assistant demo")
|
| 212 |
-
with gr.Row():
|
| 213 |
-
with gr.Column(scale=2):
|
| 214 |
-
audio_in = gr.Audio(source="microphone", type="filepath", label="Speak (record)", show_label=True)
|
| 215 |
-
btn = gr.Button("Send")
|
| 216 |
-
status = gr.Label("Status: idle")
|
| 217 |
-
with gr.Column(scale=3):
|
| 218 |
-
transcript_out = gr.Textbox(label="Transcription", interactive=False)
|
| 219 |
-
sentiment_out = gr.Textbox(label="Sentiment", interactive=False)
|
| 220 |
-
assistant_out = gr.Textbox(label="Assistant response", interactive=False)
|
| 221 |
-
audio_out = gr.Audio(label="Assistant voice", interactive=False)
|
| 222 |
|
| 223 |
-
def on_click(audio):
|
| 224 |
-
status.value = "Processing..."
|
| 225 |
-
transcription, sentiment_label, assistant_text, tts_path = process_audio_and_respond(audio)
|
| 226 |
-
status.value = "Done"
|
| 227 |
-
return gr.update(value=transcription), gr.update(value=sentiment_label), gr.update(value=assistant_text), gr.update(value=tts_path)
|
| 228 |
|
| 229 |
-
btn.click(on_click, inputs=[audio_in], outputs=[transcript_out, sentiment_out, assistant_out, audio_out])
|
| 230 |
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
"- Set `USE_HOSTED_LLM=1` & provide `HUGGINGFACEHUB_API_TOKEN` to use a hosted LLM (e.g. gpt-oss variants) via the HF Inference API.\n"
|
| 235 |
-
"- To enable richer TTS (Higgs Audio v2), set `USE_HOSTED_TTS=1` and make sure your HF token has access to the model if it's gated.\n"
|
| 236 |
-
)
|
| 237 |
|
| 238 |
-
if __name__ == '__main__':
|
| 239 |
-
demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
+
import numpy as np
|
| 3 |
+
from faster_whisper import WhisperModel
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
|
| 5 |
+
model = WhisperModel("tiny.en")
|
|
|
|
| 6 |
|
| 7 |
+
def normaliseData(audioInput, stream):
|
| 8 |
+
sr, y = audioInput
|
| 9 |
+
|
| 10 |
+
# Convert to mono if stereo
|
| 11 |
+
if y.ndim > 1:
|
| 12 |
+
y = y.mean(axis=1)
|
| 13 |
+
|
| 14 |
+
y = y.astype(np.float32)
|
| 15 |
+
y /= np.max(np.abs(y))
|
| 16 |
|
| 17 |
+
if stream is not None:
|
| 18 |
+
stream = np.concatenate([stream, y])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
else:
|
| 20 |
+
stream = y
|
| 21 |
+
|
| 22 |
+
# Return the stream as state and a string representation of the array for display
|
| 23 |
+
return stream, str(stream)
|
| 24 |
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
+
with gr.Blocks() as demo:
|
| 27 |
+
audioInput = gr.Audio(sources=["microphone"], streaming=True)
|
| 28 |
+
audioOutput = gr.Textbox(label="Output")
|
| 29 |
+
state = gr.State()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
|
| 31 |
+
audioInput.stream(
|
| 32 |
+
fn=normaliseData,
|
| 33 |
+
inputs=[audioInput, state],
|
| 34 |
+
outputs=[state, audioOutput] # try switching it arround
|
| 35 |
+
)
|
| 36 |
+
demo.launch()
|
| 37 |
|
|
|
|
| 38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
|
|
|
|
| 41 |
|
| 42 |
+
segments, info = model.transcribe("audio.mp3")
|
| 43 |
+
for segment in segments:
|
| 44 |
+
print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
|
|
|
|
|
|
|
|
|
|
| 45 |
|
|
|
|
|
|
working.py
CHANGED
|
@@ -1,28 +1,28 @@
|
|
| 1 |
-
|
| 2 |
|
| 3 |
-
|
| 4 |
-
|
| 5 |
|
| 6 |
-
#
|
| 7 |
-
|
| 8 |
-
|
| 9 |
|
| 10 |
-
|
| 11 |
-
|
| 12 |
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
|
| 18 |
-
#
|
| 19 |
-
|
| 20 |
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
|
| 28 |
-
|
|
|
|
| 1 |
+
transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
|
| 2 |
|
| 3 |
+
def transcribe(stream, new_chunk):
|
| 4 |
+
sr, y = new_chunk
|
| 5 |
|
| 6 |
+
# Convert to mono if stereo
|
| 7 |
+
if y.ndim > 1:
|
| 8 |
+
y = y.mean(axis=1)
|
| 9 |
|
| 10 |
+
y = y.astype(np.float32)
|
| 11 |
+
y /= np.max(np.abs(y))
|
| 12 |
|
| 13 |
+
if stream is not None:
|
| 14 |
+
stream = np.concatenate([stream, y])
|
| 15 |
+
else:
|
| 16 |
+
stream = y
|
| 17 |
|
| 18 |
+
# Return the stream as state and a string representation of the array for display
|
| 19 |
+
return stream, str(stream)
|
| 20 |
|
| 21 |
+
demo = gr.Interface(
|
| 22 |
+
transcribe,
|
| 23 |
+
["state", gr.Audio(sources=["microphone"], streaming=True)],
|
| 24 |
+
["state", "text"],
|
| 25 |
+
live=True,
|
| 26 |
+
)
|
| 27 |
|
| 28 |
+
demo.launch()
|