Nav3005 commited on
Commit
0536406
·
verified ·
1 Parent(s): 73672ed

Upload folder using huggingface_hub

Browse files
Files changed (7) hide show
  1. .gitattributes +1 -0
  2. .gitignore +35 -0
  3. README.md +2 -8
  4. app.py +196 -0
  5. audio.srt +40 -0
  6. audio.wav +3 -0
  7. requirements.txt +96 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ audio.wav filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __pycache__/
2
+ *.pyc
3
+ *.pyo
4
+ *.pyd
5
+ env/
6
+ venv/
7
+ ENV/
8
+ .venv/
9
+ build/
10
+ dist/
11
+ *.egg-info/
12
+ .DS_Store
13
+ .ipynb_checkpoints
14
+
15
+ .snapshots
16
+ .gradio
17
+ *.static/
18
+ templates/
19
+ uploads/
20
+ outputs/
21
+
22
+ # VSCode
23
+ .vscode/
24
+ .idea/
25
+ *.code-workspace
26
+ # MacOS
27
+ .DS_Store
28
+ # Logs
29
+ logs/
30
+ *.log
31
+ npm-debug.log*
32
+ yarn-debug.log*
33
+ yarn-error.log*
34
+ dump/
35
+
README.md CHANGED
@@ -1,12 +1,6 @@
1
  ---
2
- title: Audio To Srt
3
- emoji: 🐨
4
- colorFrom: yellow
5
- colorTo: indigo
6
  sdk: gradio
7
  sdk_version: 6.4.0
8
- app_file: app.py
9
- pinned: false
10
  ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: audio-to-srt
3
+ app_file: app.py
 
 
4
  sdk: gradio
5
  sdk_version: 6.4.0
 
 
6
  ---
 
 
app.py ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ os.environ["OMP_NUM_THREADS"] = "1"
3
+ os.environ["MKL_NUM_THREADS"] = "1"
4
+
5
+ import gradio as gr
6
+ import pysrt
7
+ import requests
8
+ import tempfile
9
+ from faster_whisper import WhisperModel
10
+ from datetime import timedelta
11
+ from urllib.parse import urlparse
12
+
13
+
14
+ # -----------------------------
15
+ # Core subtitle generator
16
+ # -----------------------------
17
+ class FibonacciSubtitleGenerator:
18
+ def __init__(self, model_size="base"):
19
+ self.model = WhisperModel(
20
+ model_size,
21
+ device="cpu",
22
+ compute_type="int8"
23
+ )
24
+
25
+ def transcribe(self, audio_path):
26
+ segments, _ = self.model.transcribe(
27
+ audio_path,
28
+ word_timestamps=True,
29
+ vad_filter=True
30
+ )
31
+ return segments
32
+
33
+ def extract_words(self, segments):
34
+ words = []
35
+ for segment in segments:
36
+ if not segment.words:
37
+ continue
38
+ for w in segment.words:
39
+ if w.start is None or w.end is None:
40
+ continue
41
+ words.append({
42
+ "word": w.word.strip(),
43
+ "start": float(w.start),
44
+ "end": float(w.end)
45
+ })
46
+ return words
47
+
48
+ def create_fibonacci_subtitles(self, words):
49
+ subs = pysrt.SubRipFile()
50
+
51
+ total_words = len(words)
52
+ index = 0
53
+ subtitle_index = 1
54
+
55
+ prev_size = 1
56
+ curr_size = 1
57
+
58
+ while index < total_words:
59
+ planned_size = curr_size
60
+ remaining = total_words - (index + planned_size)
61
+ next_size = prev_size + curr_size
62
+
63
+ # absorb leftovers
64
+ if remaining > 0 and remaining < next_size:
65
+ planned_size += remaining
66
+
67
+ subtitle_words = []
68
+ start_time = None
69
+ end_time = None
70
+
71
+ for _ in range(planned_size):
72
+ if index >= total_words:
73
+ break
74
+ w = words[index]
75
+ subtitle_words.append(w["word"])
76
+ if start_time is None:
77
+ start_time = w["start"]
78
+ end_time = w["end"]
79
+ index += 1
80
+
81
+ subs.append(
82
+ pysrt.SubRipItem(
83
+ index=subtitle_index,
84
+ start=self._to_time(start_time),
85
+ end=self._to_time(end_time),
86
+ text=" ".join(subtitle_words)
87
+ )
88
+ )
89
+ subtitle_index += 1
90
+
91
+ if planned_size == curr_size:
92
+ prev_size, curr_size = curr_size, next_size
93
+ else:
94
+ break
95
+
96
+ return subs
97
+
98
+ def _to_time(self, seconds):
99
+ td = timedelta(seconds=seconds)
100
+ return pysrt.SubRipTime(
101
+ hours=td.seconds // 3600,
102
+ minutes=(td.seconds % 3600) // 60,
103
+ seconds=td.seconds % 60,
104
+ milliseconds=td.microseconds // 1000
105
+ )
106
+
107
+
108
+ # -----------------------------
109
+ # Helper: download audio from URL
110
+ # -----------------------------
111
+ def download_audio(url: str) -> str:
112
+ parsed = urlparse(url)
113
+ if parsed.scheme not in ("http", "https"):
114
+ raise ValueError("Invalid URL scheme")
115
+
116
+ response = requests.get(url, stream=True, timeout=30)
117
+ response.raise_for_status()
118
+
119
+ suffix = os.path.splitext(parsed.path)[1] or ".wav"
120
+ tmp = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
121
+
122
+ for chunk in response.iter_content(chunk_size=8192):
123
+ tmp.write(chunk)
124
+
125
+ tmp.close()
126
+ return tmp.name
127
+
128
+
129
+ # -----------------------------
130
+ # Gradio callable function
131
+ # -----------------------------
132
+ def generate_srt(audio_file, audio_url, model_size):
133
+ # Validation: exactly one input must be provided
134
+ if bool(audio_file) == bool(audio_url):
135
+ raise gr.Error(
136
+ "Please provide EITHER an audio file OR an audio URL (not both)."
137
+ )
138
+
139
+ # Resolve audio path
140
+ if audio_url:
141
+ audio_path = download_audio(audio_url)
142
+ else:
143
+ audio_path = audio_file
144
+
145
+ generator = FibonacciSubtitleGenerator(model_size)
146
+
147
+ segments = generator.transcribe(audio_path)
148
+ words = generator.extract_words(segments)
149
+ subs = generator.create_fibonacci_subtitles(words)
150
+
151
+ out = tempfile.NamedTemporaryFile(delete=False, suffix=".srt")
152
+ subs.save(out.name, encoding="utf-8")
153
+
154
+ return out.name
155
+
156
+
157
+ # -----------------------------
158
+ # Gradio UI
159
+ # -----------------------------
160
+ with gr.Blocks(title="Fibonacci Subtitle Generator") as demo:
161
+ gr.Markdown(
162
+ """
163
+ # srt generator
164
+ """
165
+ )
166
+
167
+ with gr.Row():
168
+ audio_file = gr.Audio(
169
+ label="Upload Audio File",
170
+ type="filepath"
171
+ )
172
+
173
+ audio_url = gr.Textbox(
174
+ label="Audio URL (http/https)",
175
+ placeholder="https://example.com/audio.wav"
176
+ )
177
+
178
+ model_choice = gr.Dropdown(
179
+ choices=["tiny", "base", "small", "medium"],
180
+ value="base",
181
+ label="Whisper Model"
182
+ )
183
+
184
+ generate_btn = gr.Button("Generate SRT")
185
+
186
+ output_file = gr.File(label="Download SRT")
187
+
188
+ generate_btn.click(
189
+ fn=generate_srt,
190
+ inputs=[audio_file, audio_url, model_choice],
191
+ outputs=output_file
192
+ )
193
+
194
+
195
+ if __name__ == "__main__":
196
+ demo.launch(share=True)
audio.srt ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 1
2
+ 00:00:00,000 --> 00:00:00,180
3
+ I
4
+
5
+ 2
6
+ 00:00:00,180 --> 00:00:00,920
7
+ spotted buried
8
+
9
+ 3
10
+ 00:00:00,920 --> 00:00:01,700
11
+ in the appendix
12
+
13
+ 4
14
+ 00:00:01,700 --> 00:00:03,460
15
+ proofs at Minotrew, a verb
16
+
17
+ 5
18
+ 00:00:03,460 --> 00:00:06,340
19
+ that leans backward, were, for three seconds it
20
+
21
+ 6
22
+ 00:00:06,340 --> 00:00:10,140
23
+ glows on my screen before the correction order coughs through the telescreen. The
24
+
25
+ 7
26
+ 00:00:10,140 --> 00:00:16,600
27
+ chute yawns like a mouth, my finger hovers over send, the page updates, the past is repaired, but the syllable burrows.
28
+
29
+ 8
30
+ 00:00:16,940 --> 00:00:25,440
31
+ In new speed there is no room to lean only to stand. I sign out, drift past the two minutes and slip into the pearl quarter where the air smells of soap and rain.
32
+
33
+ 9
34
+ 00:00:25,440 --> 00:00:41,460
35
+ In a junk shop window, a small glass round as an eye, a paperweight trapping a curl of pale paper and a bubble of air. I buy it for nothing I can afford. Back home I write it tiny on a torn scrap, were. I tilt the paperweight and the word distort swims, multiplies a
36
+
37
+ 10
38
+ 00:00:41,460 --> 00:01:18,000
39
+ whole tense blooms like a reef. Bells ring somewhere far off, names I almost remember, and for a moment the room brightens with the light without edges. Then the telescreen clears its throat and the memory hole exhales. I let the scrap go. Smoke eats it. The glass stays cool in my palm. Years later, in a quiet building that needs no slogans, a curator dusts a cracked paperweight labeled. Relic, airstrip one, ministry of truth, inside clings a browned curl of paper where an ink blot suggests a letter. The curator squints guesses a word that leans backward. She whispers it, an ordinary word, and the city's bells answer as if they always had.
40
+
audio.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec0fc316ff0d84bced1d9fcfb68fb0fd556746cc52477c303615ad6ae4e2d8f3
3
+ size 5005356
requirements.txt ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiofiles==24.1.0
2
+ annotated-doc==0.0.4
3
+ annotated-types==0.7.0
4
+ anyio==4.12.1
5
+ attrs==25.4.0
6
+ av==16.1.0
7
+ blinker==1.9.0
8
+ brotli==1.2.0
9
+ certifi==2026.1.4
10
+ cffi==2.0.0
11
+ chardet==5.2.0
12
+ charset-normalizer==3.4.4
13
+ click==8.3.1
14
+ coloredlogs==15.0.1
15
+ cryptography==46.0.3
16
+ ctranslate2==4.6.3
17
+ fastapi==0.128.0
18
+ faster-whisper==1.2.1
19
+ ffmpeg-python==0.2.0
20
+ ffmpy==1.0.0
21
+ filelock==3.20.3
22
+ Flask==3.1.2
23
+ flatbuffers==25.12.19
24
+ fsspec==2026.1.0
25
+ future==1.0.0
26
+ gradio==6.4.0
27
+ gradio_client==2.0.3
28
+ groovy==0.1.2
29
+ h11==0.16.0
30
+ hf-xet==1.2.0
31
+ httpcore==1.0.9
32
+ httpx==0.28.1
33
+ httpx-sse==0.4.3
34
+ huggingface_hub==1.3.3
35
+ humanfriendly==10.0
36
+ idna==3.11
37
+ itsdangerous==2.2.0
38
+ Jinja2==3.1.6
39
+ jsonschema==4.26.0
40
+ jsonschema-specifications==2025.9.1
41
+ llvmlite==0.46.0
42
+ markdown-it-py==4.0.0
43
+ MarkupSafe==3.0.3
44
+ mcp==1.25.0
45
+ mdurl==0.1.2
46
+ more-itertools==10.8.0
47
+ mpmath==1.3.0
48
+ networkx==3.6.1
49
+ numba==0.63.1
50
+ numpy==2.3.5
51
+ onnxruntime==1.23.2
52
+ openai-whisper==20250625
53
+ orjson==3.11.5
54
+ packaging==26.0
55
+ pandas==2.3.3
56
+ pillow==12.1.0
57
+ protobuf==6.33.4
58
+ pycparser==3.0
59
+ pydantic==2.12.5
60
+ pydantic-settings==2.12.0
61
+ pydantic_core==2.41.5
62
+ pydub==0.25.1
63
+ Pygments==2.19.2
64
+ PyJWT==2.10.1
65
+ pysrt==1.1.2
66
+ python-dateutil==2.9.0.post0
67
+ python-dotenv==1.2.1
68
+ python-multipart==0.0.21
69
+ pytz==2025.2
70
+ PyYAML==6.0.3
71
+ referencing==0.37.0
72
+ regex==2026.1.15
73
+ requests==2.32.5
74
+ rich==14.2.0
75
+ rpds-py==0.30.0
76
+ safehttpx==0.1.7
77
+ semantic-version==2.10.0
78
+ setuptools==80.10.1
79
+ shellingham==1.5.4
80
+ six==1.17.0
81
+ sse-starlette==3.2.0
82
+ starlette==0.50.0
83
+ sympy==1.14.0
84
+ tiktoken==0.12.0
85
+ tokenizers==0.22.2
86
+ tomlkit==0.13.3
87
+ torch==2.10.0
88
+ tqdm==4.67.1
89
+ typer==0.21.1
90
+ typer-slim==0.21.1
91
+ typing-inspection==0.4.2
92
+ typing_extensions==4.15.0
93
+ tzdata==2025.3
94
+ urllib3==2.6.3
95
+ uvicorn==0.40.0
96
+ Werkzeug==3.1.5