Scrapyard commited on
Commit
acbd561
·
1 Parent(s): f6b199b

have it working it as a bloack

Browse files
Files changed (5) hide show
  1. =2.0 +0 -191
  2. =3.30 +0 -0
  3. =4.30 +0 -0
  4. app.py +31 -225
  5. working.py +21 -21
=2.0 DELETED
@@ -1,191 +0,0 @@
1
- Requirement already satisfied: gradio in /usr/local/lib/python3.10/site-packages (5.41.0)
2
- Collecting transformers
3
- Downloading transformers-4.55.0-py3-none-any.whl.metadata (39 kB)
4
- Collecting torch
5
- Downloading torch-2.8.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (30 kB)
6
- Collecting faster-whisper
7
- Downloading faster_whisper-1.2.0-py3-none-any.whl.metadata (16 kB)
8
- Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.10/site-packages (0.34.3)
9
- Requirement already satisfied: numpy in /usr/local/lib/python3.10/site-packages (2.2.6)
10
- Collecting scipy
11
- Downloading scipy-1.15.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
12
- Collecting soundfile
13
- Downloading soundfile-0.13.1-py2.py3-none-manylinux_2_28_x86_64.whl.metadata (16 kB)
14
- Collecting ffmpeg-python
15
- Downloading ffmpeg_python-0.2.0-py3-none-any.whl.metadata (1.7 kB)
16
- Requirement already satisfied: aiofiles<25.0,>=22.0 in /usr/local/lib/python3.10/site-packages (from gradio) (24.1.0)
17
- Requirement already satisfied: anyio<5.0,>=3.0 in /usr/local/lib/python3.10/site-packages (from gradio) (4.10.0)
18
- Requirement already satisfied: brotli>=1.1.0 in /usr/local/lib/python3.10/site-packages (from gradio) (1.1.0)
19
- Requirement already satisfied: fastapi<1.0,>=0.115.2 in /usr/local/lib/python3.10/site-packages (from gradio) (0.116.1)
20
- Requirement already satisfied: ffmpy in /usr/local/lib/python3.10/site-packages (from gradio) (0.6.1)
21
- Requirement already satisfied: gradio-client==1.11.0 in /usr/local/lib/python3.10/site-packages (from gradio) (1.11.0)
22
- Requirement already satisfied: groovy~=0.1 in /usr/local/lib/python3.10/site-packages (from gradio) (0.1.2)
23
- Requirement already satisfied: httpx<1.0,>=0.24.1 in /usr/local/lib/python3.10/site-packages (from gradio) (0.28.1)
24
- Requirement already satisfied: jinja2<4.0 in /usr/local/lib/python3.10/site-packages (from gradio) (3.1.6)
25
- Requirement already satisfied: markupsafe<4.0,>=2.0 in /usr/local/lib/python3.10/site-packages (from gradio) (3.0.2)
26
- Requirement already satisfied: orjson~=3.0 in /usr/local/lib/python3.10/site-packages (from gradio) (3.11.1)
27
- Requirement already satisfied: packaging in /usr/local/lib/python3.10/site-packages (from gradio) (25.0)
28
- Requirement already satisfied: pandas<3.0,>=1.0 in /usr/local/lib/python3.10/site-packages (from gradio) (2.3.1)
29
- Requirement already satisfied: pillow<12.0,>=8.0 in /usr/local/lib/python3.10/site-packages (from gradio) (11.3.0)
30
- Requirement already satisfied: pydantic<2.12,>=2.0 in /usr/local/lib/python3.10/site-packages (from gradio) (2.11.7)
31
- Requirement already satisfied: pydub in /usr/local/lib/python3.10/site-packages (from gradio) (0.25.1)
32
- Requirement already satisfied: python-multipart>=0.0.18 in /usr/local/lib/python3.10/site-packages (from gradio) (0.0.20)
33
- Requirement already satisfied: pyyaml<7.0,>=5.0 in /usr/local/lib/python3.10/site-packages (from gradio) (6.0.2)
34
- Requirement already satisfied: ruff>=0.9.3 in /usr/local/lib/python3.10/site-packages (from gradio) (0.12.7)
35
- Requirement already satisfied: safehttpx<0.2.0,>=0.1.6 in /usr/local/lib/python3.10/site-packages (from gradio) (0.1.6)
36
- Requirement already satisfied: semantic-version~=2.0 in /usr/local/lib/python3.10/site-packages (from gradio) (2.10.0)
37
- Requirement already satisfied: starlette<1.0,>=0.40.0 in /usr/local/lib/python3.10/site-packages (from gradio) (0.47.2)
38
- Requirement already satisfied: tomlkit<0.14.0,>=0.12.0 in /usr/local/lib/python3.10/site-packages (from gradio) (0.13.3)
39
- Requirement already satisfied: typer<1.0,>=0.12 in /usr/local/lib/python3.10/site-packages (from gradio) (0.16.0)
40
- Requirement already satisfied: typing-extensions~=4.0 in /usr/local/lib/python3.10/site-packages (from gradio) (4.14.1)
41
- Requirement already satisfied: uvicorn>=0.14.0 in /usr/local/lib/python3.10/site-packages (from gradio) (0.35.0)
42
- Requirement already satisfied: fsspec in /usr/local/lib/python3.10/site-packages (from gradio-client==1.11.0->gradio) (2025.3.0)
43
- Requirement already satisfied: websockets<16.0,>=10.0 in /usr/local/lib/python3.10/site-packages (from gradio-client==1.11.0->gradio) (15.0.1)
44
- Requirement already satisfied: filelock in /usr/local/lib/python3.10/site-packages (from huggingface-hub) (3.18.0)
45
- Requirement already satisfied: requests in /usr/local/lib/python3.10/site-packages (from huggingface-hub) (2.32.4)
46
- Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.10/site-packages (from huggingface-hub) (4.67.1)
47
- Requirement already satisfied: hf-xet<2.0.0,>=1.1.3 in /usr/local/lib/python3.10/site-packages (from huggingface-hub) (1.1.7)
48
- Requirement already satisfied: exceptiongroup>=1.0.2 in /usr/local/lib/python3.10/site-packages (from anyio<5.0,>=3.0->gradio) (1.3.0)
49
- Requirement already satisfied: idna>=2.8 in /usr/local/lib/python3.10/site-packages (from anyio<5.0,>=3.0->gradio) (3.10)
50
- Requirement already satisfied: sniffio>=1.1 in /usr/local/lib/python3.10/site-packages (from anyio<5.0,>=3.0->gradio) (1.3.1)
51
- Requirement already satisfied: certifi in /usr/local/lib/python3.10/site-packages (from httpx<1.0,>=0.24.1->gradio) (2025.8.3)
52
- Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.10/site-packages (from httpx<1.0,>=0.24.1->gradio) (1.0.9)
53
- Requirement already satisfied: h11>=0.16 in /usr/local/lib/python3.10/site-packages (from httpcore==1.*->httpx<1.0,>=0.24.1->gradio) (0.16.0)
54
- Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/site-packages (from pandas<3.0,>=1.0->gradio) (2.9.0.post0)
55
- Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/site-packages (from pandas<3.0,>=1.0->gradio) (2025.2)
56
- Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.10/site-packages (from pandas<3.0,>=1.0->gradio) (2025.2)
57
- Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.10/site-packages (from pydantic<2.12,>=2.0->gradio) (0.7.0)
58
- Requirement already satisfied: pydantic-core==2.33.2 in /usr/local/lib/python3.10/site-packages (from pydantic<2.12,>=2.0->gradio) (2.33.2)
59
- Requirement already satisfied: typing-inspection>=0.4.0 in /usr/local/lib/python3.10/site-packages (from pydantic<2.12,>=2.0->gradio) (0.4.1)
60
- Requirement already satisfied: click>=8.0.0 in /usr/local/lib/python3.10/site-packages (from typer<1.0,>=0.12->gradio) (8.0.4)
61
- Requirement already satisfied: shellingham>=1.3.0 in /usr/local/lib/python3.10/site-packages (from typer<1.0,>=0.12->gradio) (1.5.4)
62
- Requirement already satisfied: rich>=10.11.0 in /usr/local/lib/python3.10/site-packages (from typer<1.0,>=0.12->gradio) (14.1.0)
63
- Collecting regex!=2019.12.17 (from transformers)
64
- Downloading regex-2025.7.34-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl.metadata (40 kB)
65
- Collecting tokenizers<0.22,>=0.21 (from transformers)
66
- Downloading tokenizers-0.21.4-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
67
- Collecting safetensors>=0.4.3 (from transformers)
68
- Downloading safetensors-0.6.1-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.1 kB)
69
- Collecting sympy>=1.13.3 (from torch)
70
- Downloading sympy-1.14.0-py3-none-any.whl.metadata (12 kB)
71
- Collecting networkx (from torch)
72
- Downloading networkx-3.4.2-py3-none-any.whl.metadata (6.3 kB)
73
- Collecting nvidia-cuda-nvrtc-cu12==12.8.93 (from torch)
74
- Downloading nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl.metadata (1.7 kB)
75
- Collecting nvidia-cuda-runtime-cu12==12.8.90 (from torch)
76
- Downloading nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.7 kB)
77
- Collecting nvidia-cuda-cupti-cu12==12.8.90 (from torch)
78
- Downloading nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.7 kB)
79
- Collecting nvidia-cudnn-cu12==9.10.2.21 (from torch)
80
- Downloading nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl.metadata (1.8 kB)
81
- Collecting nvidia-cublas-cu12==12.8.4.1 (from torch)
82
- Downloading nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_x86_64.whl.metadata (1.7 kB)
83
- Collecting nvidia-cufft-cu12==11.3.3.83 (from torch)
84
- Downloading nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.7 kB)
85
- Collecting nvidia-curand-cu12==10.3.9.90 (from torch)
86
- Downloading nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_x86_64.whl.metadata (1.7 kB)
87
- Collecting nvidia-cusolver-cu12==11.7.3.90 (from torch)
88
- Downloading nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl.metadata (1.8 kB)
89
- Collecting nvidia-cusparse-cu12==12.5.8.93 (from torch)
90
- Downloading nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.8 kB)
91
- Collecting nvidia-cusparselt-cu12==0.7.1 (from torch)
92
- Downloading nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_x86_64.whl.metadata (7.0 kB)
93
- Collecting nvidia-nccl-cu12==2.27.3 (from torch)
94
- Downloading nvidia_nccl_cu12-2.27.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (2.0 kB)
95
- Collecting nvidia-nvtx-cu12==12.8.90 (from torch)
96
- Downloading nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.8 kB)
97
- Collecting nvidia-nvjitlink-cu12==12.8.93 (from torch)
98
- Downloading nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl.metadata (1.7 kB)
99
- Collecting nvidia-cufile-cu12==1.13.1.3 (from torch)
100
- Downloading nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.7 kB)
101
- Collecting triton==3.4.0 (from torch)
102
- Downloading triton-3.4.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (1.7 kB)
103
- Requirement already satisfied: setuptools>=40.8.0 in /usr/local/lib/python3.10/site-packages (from triton==3.4.0->torch) (65.5.1)
104
- Collecting ctranslate2<5,>=4.0 (from faster-whisper)
105
- Downloading ctranslate2-4.6.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (10 kB)
106
- Collecting onnxruntime<2,>=1.14 (from faster-whisper)
107
- Downloading onnxruntime-1.22.1-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (4.6 kB)
108
- Collecting av>=11 (from faster-whisper)
109
- Downloading av-15.0.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (4.6 kB)
110
- Collecting coloredlogs (from onnxruntime<2,>=1.14->faster-whisper)
111
- Downloading coloredlogs-15.0.1-py2.py3-none-any.whl.metadata (12 kB)
112
- Collecting flatbuffers (from onnxruntime<2,>=1.14->faster-whisper)
113
- Downloading flatbuffers-25.2.10-py2.py3-none-any.whl.metadata (875 bytes)
114
- Requirement already satisfied: protobuf in /usr/local/lib/python3.10/site-packages (from onnxruntime<2,>=1.14->faster-whisper) (3.20.3)
115
- Requirement already satisfied: cffi>=1.0 in /usr/local/lib/python3.10/site-packages (from soundfile) (1.17.1)
116
- Collecting future (from ffmpeg-python)
117
- Downloading future-1.0.0-py3-none-any.whl.metadata (4.0 kB)
118
- Requirement already satisfied: pycparser in /usr/local/lib/python3.10/site-packages (from cffi>=1.0->soundfile) (2.22)
119
- Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas<3.0,>=1.0->gradio) (1.17.0)
120
- Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/site-packages (from rich>=10.11.0->typer<1.0,>=0.12->gradio) (3.0.0)
121
- Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/site-packages (from rich>=10.11.0->typer<1.0,>=0.12->gradio) (2.19.2)
122
- Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/site-packages (from markdown-it-py>=2.2.0->rich>=10.11.0->typer<1.0,>=0.12->gradio) (0.1.2)
123
- Collecting mpmath<1.4,>=1.1.0 (from sympy>=1.13.3->torch)
124
- Downloading mpmath-1.3.0-py3-none-any.whl.metadata (8.6 kB)
125
- Collecting humanfriendly>=9.1 (from coloredlogs->onnxruntime<2,>=1.14->faster-whisper)
126
- Downloading humanfriendly-10.0-py2.py3-none-any.whl.metadata (9.2 kB)
127
- Requirement already satisfied: charset_normalizer<4,>=2 in /usr/local/lib/python3.10/site-packages (from requests->huggingface-hub) (3.4.2)
128
- Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/site-packages (from requests->huggingface-hub) (2.5.0)
129
- Downloading transformers-4.55.0-py3-none-any.whl (11.3 MB)
130
- ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 11.3/11.3 MB 157.2 MB/s 0:00:00
131
- Downloading tokenizers-0.21.4-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)
132
- ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 3.1/3.1 MB 168.3 MB/s 0:00:00
133
- Downloading torch-2.8.0-cp310-cp310-manylinux_2_28_x86_64.whl (888.0 MB)
134
- ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 888.0/888.0 MB 105.1 MB/s 0:00:06
135
- Downloading nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_x86_64.whl (594.3 MB)
136
- ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 594.3/594.3 MB 258.5 MB/s 0:00:01
137
- Downloading nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (10.2 MB)
138
- ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 10.2/10.2 MB 226.6 MB/s 0:00:00
139
- Downloading nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl (88.0 MB)
140
- ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 88.0/88.0 MB 378.9 MB/s 0:00:00
141
- Downloading nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (954 kB)
142
- ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 954.8/954.8 kB 69.2 MB/s 0:00:00
143
- Downloading nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl (706.8 MB)
144
- ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 706.8/706.8 MB 286.2 MB/s 0:00:01
145
- Downloading nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (193.1 MB)
146
- ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 193.1/193.1 MB 163.9 MB/s 0:00:01
147
- Downloading nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (1.2 MB)
148
- ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.2/1.2 MB 89.5 MB/s 0:00:00
149
- Downloading nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_x86_64.whl (63.6 MB)
150
- ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 63.6/63.6 MB 303.5 MB/s 0:00:00
151
- Downloading nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl (267.5 MB)
152
- ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 267.5/267.5 MB 265.9 MB/s 0:00:00
153
- Downloading nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (288.2 MB)
154
- ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 288.2/288.2 MB 383.7 MB/s 0:00:00
155
- Downloading nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_x86_64.whl (287.2 MB)
156
- ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 287.2/287.2 MB 389.0 MB/s 0:00:00
157
- Downloading nvidia_nccl_cu12-2.27.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (322.4 MB)
158
- ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 322.4/322.4 MB 298.2 MB/s 0:00:01
159
- Downloading nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl (39.3 MB)
160
- ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 39.3/39.3 MB 378.8 MB/s 0:00:00
161
- Downloading nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (89 kB)
162
- Downloading triton-3.4.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (155.4 MB)
163
- ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 155.4/155.4 MB 163.6 MB/s 0:00:00
164
- Downloading faster_whisper-1.2.0-py3-none-any.whl (1.1 MB)
165
- ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.1/1.1 MB 76.0 MB/s 0:00:00
166
- Downloading ctranslate2-4.6.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (38.4 MB)
167
- ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 38.4/38.4 MB 161.9 MB/s 0:00:00
168
- Downloading onnxruntime-1.22.1-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (16.5 MB)
169
- ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 16.5/16.5 MB 163.1 MB/s 0:00:00
170
- Downloading scipy-1.15.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (37.7 MB)
171
- ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 37.7/37.7 MB 395.4 MB/s 0:00:00
172
- Downloading soundfile-0.13.1-py2.py3-none-manylinux_2_28_x86_64.whl (1.3 MB)
173
- ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.3/1.3 MB 33.6 MB/s 0:00:00
174
- Downloading ffmpeg_python-0.2.0-py3-none-any.whl (25 kB)
175
- Downloading av-15.0.0-cp310-cp310-manylinux_2_28_x86_64.whl (39.2 MB)
176
- ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 39.2/39.2 MB 163.0 MB/s 0:00:00
177
- Downloading regex-2025.7.34-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl (789 kB)
178
- ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 789.8/789.8 kB 50.5 MB/s 0:00:00
179
- Downloading safetensors-0.6.1-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (485 kB)
180
- Downloading sympy-1.14.0-py3-none-any.whl (6.3 MB)
181
- ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 6.3/6.3 MB 209.0 MB/s 0:00:00
182
- Downloading mpmath-1.3.0-py3-none-any.whl (536 kB)
183
- ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 536.2/536.2 kB 18.0 MB/s 0:00:00
184
- Downloading coloredlogs-15.0.1-py2.py3-none-any.whl (46 kB)
185
- Downloading humanfriendly-10.0-py2.py3-none-any.whl (86 kB)
186
- Downloading flatbuffers-25.2.10-py2.py3-none-any.whl (30 kB)
187
- Downloading future-1.0.0-py3-none-any.whl (491 kB)
188
- Downloading networkx-3.4.2-py3-none-any.whl (1.7 MB)
189
- ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.7/1.7 MB 129.1 MB/s 0:00:00
190
- Installing collected packages: nvidia-cusparselt-cu12, mpmath, flatbuffers, triton, sympy, scipy, safetensors, regex, nvidia-nvtx-cu12, nvidia-nvjitlink-cu12, nvidia-nccl-cu12, nvidia-curand-cu12, nvidia-cufile-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, networkx, humanfriendly, future, ctranslate2, av, soundfile, nvidia-cusparse-cu12, nvidia-cufft-cu12, nvidia-cudnn-cu12, ffmpeg-python, coloredlogs, tokenizers, onnxruntime, nvidia-cusolver-cu12, transformers, torch, faster-whisper
191
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
=3.30 DELETED
File without changes
=4.30 DELETED
File without changes
app.py CHANGED
@@ -1,239 +1,45 @@
1
- # app.py
2
- # Minimal Gradio voice-assistant demo for Hugging Face Spaces.
3
- # - STT: faster-whisper (WhisperModel)
4
- # - Sentiment: transformers pipeline
5
- # - LLM: local HF model (small) or HF Inference API (recommended for huge models)
6
- # - TTS: bosonai/higgs-audio-v2 (when available)
7
- #
8
- # NOTES:
9
- # - Replace model names with versions you prefer.
10
- # - If you want streaming / low-latency WebRTC, see the "Advanced: WebRTC / streaming" comments below.
11
-
12
- import os
13
- import tempfile
14
- import uuid
15
- from pathlib import Path
16
- from faster_whisper import WhisperModel
17
- from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
18
- import torch
19
  import gradio as gr
20
- from huggingface_hub import InferenceApi, hf_hub_download
21
-
22
- # --------------------
23
- # CONFIG / ENV VARS
24
- # --------------------
25
- HUGGINGFACEHUB_API_TOKEN = os.environ.get("HUGGINGFACEHUB_API_TOKEN") # optional for hosted inference
26
- USE_HOSTED_LLM = bool(os.environ.get("USE_HOSTED_LLM", "")) # set to "1" to use hosted HF inference instead of a local model
27
- LLM_MODEL_NAME = os.environ.get("LLM_MODEL_NAME", "gpt2") # default small model for Spaces demo
28
- TTS_MODEL = os.environ.get("TTS_MODEL", "bosonai/higgs-audio-v2-generation-3B-base") # example; may need HF token
29
- DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
30
-
31
- # --------------------
32
- # Initialize models
33
- # --------------------
34
-
35
- # Faster Whisper STT
36
- # Choose model size: "small", "medium", "large-v2", etc. Smaller -> lower latency.
37
- whisper_model_size = "small"
38
- whisper = WhisperModel(whisper_model_size, device=DEVICE, compute_type="int8_float16") # compute_type optional based on your hardware
39
-
40
- # Sentiment / conversation cue analysis
41
- sentiment = pipeline("sentiment-analysis", device=0 if DEVICE == "cuda" else -1)
42
-
43
- # LLM (local fallback)
44
- local_llm = None
45
- local_tokenizer = None
46
- if not USE_HOSTED_LLM:
47
- try:
48
- # Keep this small on Spaces by default. Swap to a bigger model if you have the hardware.
49
- local_llm_name = LLM_MODEL_NAME # e.g., "gpt2", "EleutherAI/gpt-neo-125M", or an HF chat model
50
- local_tokenizer = AutoTokenizer.from_pretrained(local_llm_name)
51
- local_llm = AutoModelForCausalLM.from_pretrained(local_llm_name, device_map="auto" if DEVICE=="cuda" else None)
52
- except Exception as e:
53
- print("Failed to load local LLM:", e)
54
- local_llm = None
55
-
56
- # Hosted inference (optional)
57
- hf_inference = None
58
- if USE_HOSTED_LLM and HUGGINGFACEHUB_API_TOKEN:
59
- hf_inference = InferenceApi(repo_id=os.environ.get("HF_INFERENCE_MODEL", "openai/gpt-oss-7b"), token=HUGGINGFACEHUB_API_TOKEN)
60
-
61
-
62
- # TTS: This is an illustration. For actual use, replace with a tested TTS pipeline suited to the model.
63
- def tts_generate(text: str, out_path: str) -> str:
64
- """
65
- Generate TTS audio for `text` and save to out_path.
66
- For Higgs Audio v2 (BosonAI) you likely need a specific inference client or HF pipeline that supports that repo.
67
- This function is a stub illustrating where TTS would be called.
68
- """
69
- # Option A: If the TTS repo provides a pipeline via transformers (pseudo):
70
- # from transformers import pipeline
71
- # tts_pipe = pipeline("text-to-speech", model=TTS_MODEL, use_auth_token=HUGGINGFACEHUB_API_TOKEN)
72
- # audio_array, sr = tts_pipe(text)
73
- # save audio to out_path here...
74
- #
75
- # Option B: If using hosted HF inference:
76
- if HUGGINGFACEHUB_API_TOKEN and os.environ.get("USE_HOSTED_TTS"):
77
- # Using HF Inference API (simplified)
78
- from huggingface_hub import InferenceApi
79
- inf = InferenceApi(repo_id=TTS_MODEL, token=HUGGINGFACEHUB_API_TOKEN)
80
- res = inf(text, {"wait_for_model": True})
81
- # hf returns audio in base64 or a URL depending on model; handle accordingly (pseudo)
82
- # Save binary to out_path...
83
- # Fallback: return an empty audio or a short beep (silence)
84
- # For demo, create a tiny silent WAV so Gradio can play something:
85
- import wave, struct
86
- with wave.open(out_path, "wb") as wf:
87
- n_channels = 1
88
- sampwidth = 2
89
- framerate = 16000
90
- n_frames = framerate // 10 # 0.1s silence
91
- wf.setnchannels(n_channels)
92
- wf.setsampwidth(sampwidth)
93
- wf.setframerate(framerate)
94
- wf.writeframes(b"\x00\x00" * n_frames)
95
- return out_path
96
-
97
- # --------------------
98
- # Core pipeline functions
99
- # --------------------
100
-
101
- def transcribe_audio(audio_file: str):
102
- """
103
- audio_file: path to uploaded audio (wav/m4a/ogg)
104
- returns: transcription string and whisper segments (list)
105
- """
106
- segments = []
107
- text_acc = []
108
- # faster-whisper: transcribe file
109
- # model.transcribe returns generator of segments when streaming; we call full transcription here
110
- try:
111
- result = whisper.transcribe(audio_file, beam_size=5, word_timestamps=False)
112
- # result is a list of segments or object depending on version. We'll handle simple case:
113
- # If result has 'segments' attribute (older/variant) handle it:
114
- if hasattr(result, "segments"):
115
- for seg in result.segments:
116
- segments.append({"start": seg.start, "end": seg.end, "text": seg.text})
117
- text_acc.append(seg.text)
118
- else:
119
- # result may be tuple/text depending on faster-whisper version
120
- if isinstance(result, dict) and "text" in result:
121
- text_acc.append(result["text"])
122
- elif isinstance(result, str):
123
- text_acc.append(result)
124
- except Exception as e:
125
- print("STT error:", e)
126
- return "",""
127
 
128
- transcription = " ".join(text_acc).strip()
129
- return transcription, segments
130
 
131
- def analyze_sentiment(text: str):
132
- if not text:
133
- return {"label":"NEUTRAL", "score": 0.0}
134
- try:
135
- out = sentiment(text[:1000]) # limit to first 1000 chars
136
- return out[0] if isinstance(out, list) and out else out
137
- except Exception as e:
138
- print("Sentiment error:", e)
139
- return {"label":"UNKNOWN", "score": 0.0}
140
 
141
- def llm_respond(prompt: str, conversation_history=None, max_length=256):
142
- """
143
- Returns text response for prompt. Uses hosted HF inference if USE_HOSTED_LLM else local model.
144
- conversation_history: list of (user, assistant) tuples (not deeply used in demo)
145
- """
146
- if USE_HOSTED_LLM and hf_inference:
147
- # Use HF Inference API (simplified)
148
- payload = {"inputs": prompt}
149
- try:
150
- result = hf_inference(prompt)
151
- # result could be a dict with 'generated_text' or a string
152
- if isinstance(result, dict) and "generated_text" in result:
153
- return result["generated_text"]
154
- elif isinstance(result, str):
155
- return result
156
- else:
157
- return str(result)
158
- except Exception as e:
159
- print("Hosted LLM error:", e)
160
- return "Sorry — I couldn't reach the hosted model."
161
- elif local_llm and local_tokenizer:
162
- # Local generation (small models suitable for Spaces)
163
- inputs = local_tokenizer(prompt, return_tensors="pt").to(DEVICE)
164
- outputs = local_llm.generate(**inputs, max_new_tokens=max_length)
165
- return local_tokenizer.decode(outputs[0], skip_special_tokens=True)
166
  else:
167
- return "No LLM available. Set USE_HOSTED_LLM=1 and provide a HUGGINGFACEHUB_API_TOKEN or load a local model."
 
 
 
168
 
169
- # --------------------
170
- # Gradio UI callbacks
171
- # --------------------
172
 
173
- def process_audio_and_respond(audio):
174
- """
175
- audio: a tuple or path from gradio Audio component
176
- Returns: transcription, sentiment label, assistant text, tts_audio_path
177
- """
178
- if audio is None:
179
- return "","", "No audio received.", None
180
- # gradio audio file can be a dict or tuple; handle both
181
- if isinstance(audio, tuple) or isinstance(audio, list):
182
- # (sample_rate, numpy_array) OR (path, sample_rate)
183
- # gradio sends a path when type="filepath"
184
- audio_path = audio[0] if isinstance(audio[0], str) else None
185
- elif isinstance(audio, str) and Path(audio).exists():
186
- audio_path = audio
187
- else:
188
- # try to handle (np_array, sr)
189
- import soundfile as sf
190
- tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
191
- sf.write(tmp.name, audio[0], audio[1])
192
- audio_path = tmp.name
193
-
194
- transcription, segments = transcribe_audio(audio_path)
195
- sentiment_res = analyze_sentiment(transcription)
196
- sentiment_label = sentiment_res.get("label", "UNKNOWN")
197
- # Build a prompt for LLM
198
- prompt = f"User: {transcription}\n\nAssistant:"
199
- assistant_text = llm_respond(prompt)
200
 
201
- # Synthesize TTS (non-blocking improvement: spawn background job)
202
- out_audio_path = f"/tmp/assistant_tts_{uuid.uuid4().hex}.wav"
203
- tts_generate(assistant_text, out_audio_path)
 
 
 
204
 
205
- return transcription, sentiment_label, assistant_text, out_audio_path
206
 
207
- # --------------------
208
- # GRADIO APP
209
- # --------------------
210
- with gr.Blocks(title="Voice Assistant (Faster Whisper + LLM + Higgs Audio)") as demo:
211
- gr.Markdown("# Voice Assistant demo")
212
- with gr.Row():
213
- with gr.Column(scale=2):
214
- audio_in = gr.Audio(source="microphone", type="filepath", label="Speak (record)", show_label=True)
215
- btn = gr.Button("Send")
216
- status = gr.Label("Status: idle")
217
- with gr.Column(scale=3):
218
- transcript_out = gr.Textbox(label="Transcription", interactive=False)
219
- sentiment_out = gr.Textbox(label="Sentiment", interactive=False)
220
- assistant_out = gr.Textbox(label="Assistant response", interactive=False)
221
- audio_out = gr.Audio(label="Assistant voice", interactive=False)
222
 
223
- def on_click(audio):
224
- status.value = "Processing..."
225
- transcription, sentiment_label, assistant_text, tts_path = process_audio_and_respond(audio)
226
- status.value = "Done"
227
- return gr.update(value=transcription), gr.update(value=sentiment_label), gr.update(value=assistant_text), gr.update(value=tts_path)
228
 
229
- btn.click(on_click, inputs=[audio_in], outputs=[transcript_out, sentiment_out, assistant_out, audio_out])
230
 
231
- gr.Markdown(
232
- "### Notes\n"
233
- "- This demo uses Faster Whisper for STT, a transformers sentiment classifier, and a small LLM by default.\n"
234
- "- Set `USE_HOSTED_LLM=1` & provide `HUGGINGFACEHUB_API_TOKEN` to use a hosted LLM (e.g. gpt-oss variants) via the HF Inference API.\n"
235
- "- To enable richer TTS (Higgs Audio v2), set `USE_HOSTED_TTS=1` and make sure your HF token has access to the model if it's gated.\n"
236
- )
237
 
238
- if __name__ == '__main__':
239
- demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ import numpy as np
3
+ from faster_whisper import WhisperModel
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
+ model = WhisperModel("tiny.en")
 
6
 
7
+ def normaliseData(audioInput, stream):
8
+ sr, y = audioInput
9
+
10
+ # Convert to mono if stereo
11
+ if y.ndim > 1:
12
+ y = y.mean(axis=1)
13
+
14
+ y = y.astype(np.float32)
15
+ y /= np.max(np.abs(y))
16
 
17
+ if stream is not None:
18
+ stream = np.concatenate([stream, y])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  else:
20
+ stream = y
21
+
22
+ # Return the stream as state and a string representation of the array for display
23
+ return stream, str(stream)
24
 
 
 
 
25
 
26
+ with gr.Blocks() as demo:
27
+ audioInput = gr.Audio(sources=["microphone"], streaming=True)
28
+ audioOutput = gr.Textbox(label="Output")
29
+ state = gr.State()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
+ audioInput.stream(
32
+ fn=normaliseData,
33
+ inputs=[audioInput, state],
34
+ outputs=[state, audioOutput] # try switching it arround
35
+ )
36
+ demo.launch()
37
 
 
38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
 
 
 
 
 
40
 
 
41
 
42
+ segments, info = model.transcribe("audio.mp3")
43
+ for segment in segments:
44
+ print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
 
 
 
45
 
 
 
working.py CHANGED
@@ -1,28 +1,28 @@
1
- # transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
2
 
3
- # def transcribe(stream, new_chunk):
4
- # sr, y = new_chunk
5
 
6
- # # Convert to mono if stereo
7
- # if y.ndim > 1:
8
- # y = y.mean(axis=1)
9
 
10
- # y = y.astype(np.float32)
11
- # y /= np.max(np.abs(y))
12
 
13
- # if stream is not None:
14
- # stream = np.concatenate([stream, y])
15
- # else:
16
- # stream = y
17
 
18
- # # Return the stream as state and a string representation of the array for display
19
- # return stream, str(stream)
20
 
21
- # demo = gr.Interface(
22
- # transcribe,
23
- # ["state", gr.Audio(sources=["microphone"], streaming=True)],
24
- # ["state", "text"],
25
- # live=True,
26
- # )
27
 
28
- # demo.launch()
 
1
+ transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
2
 
3
+ def transcribe(stream, new_chunk):
4
+ sr, y = new_chunk
5
 
6
+ # Convert to mono if stereo
7
+ if y.ndim > 1:
8
+ y = y.mean(axis=1)
9
 
10
+ y = y.astype(np.float32)
11
+ y /= np.max(np.abs(y))
12
 
13
+ if stream is not None:
14
+ stream = np.concatenate([stream, y])
15
+ else:
16
+ stream = y
17
 
18
+ # Return the stream as state and a string representation of the array for display
19
+ return stream, str(stream)
20
 
21
+ demo = gr.Interface(
22
+ transcribe,
23
+ ["state", gr.Audio(sources=["microphone"], streaming=True)],
24
+ ["state", "text"],
25
+ live=True,
26
+ )
27
 
28
+ demo.launch()