eubottura commited on
Commit
c2c1bfd
verified
1 Parent(s): c23b652

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. app.py +389 -0
  2. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,389 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import json
3
+ import re
4
+ from collections import Counter
5
+ from datetime import timedelta
6
+ from typing import List, Dict, Any, Optional, Tuple
7
+
8
+ # Language-specific rules and dictionaries
9
+ LANGUAGE_RULES = {
10
+ "en": {
11
+ "trigger_words": ["however", "but", "therefore", "meanwhile", "nevertheless"],
12
+ "forbidden_endings": ["a", "an", "the", "and", "but", "or", "for", "nor", "on", "at", "to", "from", "by", "of", "in", "with"],
13
+ "sentence_boundaries": [".", "?", "!"]
14
+ },
15
+ "es": {
16
+ "trigger_words": ["sin embargo", "pero", "por lo tanto", "mientras tanto", "no obstante"],
17
+ "forbidden_endings": ["el", "la", "los", "las", "y", "o", "para", "por", "de", "en", "con", "a", "de", "por"],
18
+ "sentence_boundaries": [".", "?", "!"]
19
+ },
20
+ "fr": {
21
+ "trigger_words": ["cependant", "mais", "donc", "pendant ce temps", "n茅anmoins"],
22
+ "forbidden_endings": ["le", "la", "les", "et", "ou", "pour", "par", "de", "en", "avec", "脿", "de", "par"],
23
+ "sentence_boundaries": [".", "?", "!"]
24
+ }
25
+ }
26
+
27
+ def validate_input(json_input: str) -> Tuple[bool, Optional[Dict[str, Any]]]:
28
+ """
29
+ Validate the input JSON structure.
30
+
31
+ Args:
32
+ json_input: JSON string to validate
33
+
34
+ Returns:
35
+ Tuple of (is_valid, parsed_data) where parsed_data is None if invalid
36
+ """
37
+ try:
38
+ data = json.loads(json_input)
39
+ if not isinstance(data, dict):
40
+ return False, None
41
+ if "text" not in data or "chunks" not in data:
42
+ return False, None
43
+ if not isinstance(data["chunks"], list) or len(data["chunks"]) == 0:
44
+ return False, None
45
+ return True, data
46
+ except json.JSONDecodeError:
47
+ return False, None
48
+
49
+ def format_time(seconds: float) -> str:
50
+ """
51
+ Convert seconds to SRT time format (HH:MM:SS,mmm).
52
+
53
+ Args:
54
+ seconds: Time in seconds
55
+
56
+ Returns:
57
+ Formatted time string
58
+ """
59
+ td = timedelta(seconds=seconds)
60
+ hours, remainder = divmod(td.seconds, 3600)
61
+ minutes, seconds = divmod(remainder, 60)
62
+ milliseconds = td.microseconds // 1000
63
+ return f"{hours:02d}:{minutes:02d}:{seconds:02d},{milliseconds:03d}"
64
+
65
+ def count_words(text: str) -> int:
66
+ """
67
+ Count words in text (including spaces and punctuation).
68
+
69
+ Args:
70
+ text: Text to count words in
71
+
72
+ Returns:
73
+ Word count
74
+ """
75
+ return len(text.split())
76
+
77
+ def get_majority_speaker(chunks: List[Dict[str, Any]]) -> Optional[str]:
78
+ """
79
+ Determine majority speaker from chunks.
80
+
81
+ Args:
82
+ chunks: List of chunk dictionaries
83
+
84
+ Returns:
85
+ Majority speaker ID or None if no speaker info
86
+ """
87
+ speaker_counts = Counter()
88
+ for chunk in chunks:
89
+ if "speaker" in chunk:
90
+ speaker_counts[chunk["speaker"]] += count_words(chunk["text"])
91
+ if speaker_counts:
92
+ return speaker_counts.most_common(1)[0][0]
93
+ return None
94
+
95
+ def should_break_line(line: str, language: str, word_break_threshold: int) -> bool:
96
+ """
97
+ Determine if a line should break based on language rules.
98
+
99
+ Args:
100
+ line: Text line to check
101
+ language: ISO language code
102
+ word_break_threshold: Maximum words per line
103
+
104
+ Returns:
105
+ True if line should break
106
+ """
107
+ # Check word count threshold
108
+ if count_words(line) > word_break_threshold:
109
+ return True
110
+
111
+ # Check character limit (11 chars excluding spaces)
112
+ chars_excluding_spaces = len(re.sub(r'\s+', '', line))
113
+ if chars_excluding_spaces > 11:
114
+ return True
115
+
116
+ # Check for trigger words
117
+ rules = LANGUAGE_RULES.get(language, LANGUAGE_RULES["en"])
118
+ for trigger in rules["trigger_words"]:
119
+ if trigger.lower() in line.lower():
120
+ return True
121
+
122
+ # Check for forbidden endings
123
+ last_word = line.strip().split()[-1].lower() if line.strip() else ""
124
+ if last_word in rules["forbidden_endings"]:
125
+ return True
126
+
127
+ return False
128
+
129
+ def format_speaker_change(speaker_id: str) -> str:
130
+ """
131
+ Format speaker identifier for SRT.
132
+
133
+ Args:
134
+ speaker_id: Speaker identifier
135
+
136
+ Returns:
137
+ Formatted speaker marker
138
+ """
139
+ return f"[{speaker_id}] "
140
+
141
+ def process_chunks_to_srt(
142
+ chunks: List[Dict[str, Any]],
143
+ word_break_threshold: int,
144
+ language: str,
145
+ include_speaker: bool
146
+ ) -> str:
147
+ """
148
+ Convert transcription chunks to SRT format.
149
+
150
+ Args:
151
+ chunks: List of chunk dictionaries
152
+ word_break_threshold: Maximum words per subtitle block
153
+ language: ISO language code
154
+ include_speaker: Whether to include speaker information
155
+
156
+ Returns:
157
+ SRT formatted string
158
+ """
159
+ srt_segments = []
160
+ current_segment = []
161
+ current_speaker = None
162
+ current_start_time = None
163
+ current_end_time = None
164
+
165
+ # Process chunks to create segments
166
+ for i, chunk in enumerate(chunks):
167
+ text = chunk["text"]
168
+ start_time = chunk["timestamp"][0]
169
+ end_time = chunk["timestamp"][1]
170
+
171
+ # Initialize current segment with first chunk
172
+ if not current_segment:
173
+ current_segment = [text]
174
+ current_start_time = start_time
175
+ current_end_time = end_time
176
+ current_speaker = chunk.get("speaker")
177
+ continue
178
+
179
+ # Check if we should start a new segment
180
+ should_break = False
181
+
182
+ # Check sentence boundaries
183
+ if text.strip() and text.strip()[0] in LANGUAGE_RULES.get(language, LANGUAGE_RULES["en"])["sentence_boundaries"]:
184
+ should_break = True
185
+
186
+ # Check word count threshold
187
+ total_words = sum(count_words(t) for t in current_segment)
188
+ if total_words + count_words(text) > word_break_threshold:
189
+ should_break = True
190
+
191
+ # Check speaker change (if speaker info available)
192
+ if include_speaker and "speaker" in chunk and chunk["speaker"] != current_speaker:
193
+ should_break = True
194
+
195
+ if should_break:
196
+ # Finalize current segment
197
+ segment_text = " ".join(current_segment).strip()
198
+ srt_segments.append({
199
+ "start": current_start_time,
200
+ "end": current_end_time,
201
+ "text": segment_text,
202
+ "speaker": current_speaker
203
+ })
204
+
205
+ # Start new segment
206
+ current_segment = [text]
207
+ current_start_time = start_time
208
+ current_end_time = end_time
209
+ current_speaker = chunk.get("speaker")
210
+ else:
211
+ # Continue current segment
212
+ current_segment.append(text)
213
+ current_end_time = end_time
214
+
215
+ # Add final segment
216
+ if current_segment:
217
+ segment_text = " ".join(current_segment).strip()
218
+ srt_segments.append({
219
+ "start": current_start_time,
220
+ "end": current_end_time,
221
+ "text": segment_text,
222
+ "speaker": current_speaker
223
+ })
224
+
225
+ # Format segments as SRT
226
+ srt_lines = []
227
+ for i, segment in enumerate(srt_segments, 1):
228
+ start_time = format_time(segment["start"])
229
+ end_time = format_time(segment["end"])
230
+ text = segment["text"]
231
+
232
+ # Apply speaker marker if needed
233
+ if include_speaker and segment["speaker"]:
234
+ text = format_speaker_change(segment["speaker"]) + text
235
+
236
+ # Format SRT block
237
+ srt_lines.append(str(i))
238
+ srt_lines.append(f"{start_time} --> {end_time}")
239
+ srt_lines.append(text)
240
+ srt_lines.append("") # Blank line between segments
241
+
242
+ return "\n".join(srt_lines).strip()
243
+
244
+ def convert_transcription(
245
+ json_input: str,
246
+ word_break_threshold: int,
247
+ language: str,
248
+ include_speaker: bool
249
+ ) -> Tuple[str, str]:
250
+ """
251
+ Main conversion function from Transcribe JSON to SRT.
252
+
253
+ Args:
254
+ json_input: JSON input string
255
+ word_break_threshold: Maximum words per subtitle block
256
+ language: ISO language code
257
+ include_speaker: Whether to include speaker information
258
+
259
+ Returns:
260
+ Tuple of (srt_output, status_message)
261
+ """
262
+ # Validate input
263
+ is_valid, data = validate_input(json_input)
264
+ if not is_valid:
265
+ return "", "Invalid JSON input: Missing required 'text' or 'chunks' fields"
266
+
267
+ # Process chunks to SRT
268
+ try:
269
+ srt_output = process_chunks_to_srt(
270
+ data["chunks"],
271
+ word_break_threshold,
272
+ language,
273
+ include_speaker
274
+ )
275
+ return srt_output, "Conversion successful"
276
+ except Exception as e:
277
+ return "", f"Error during conversion: {str(e)}"
278
+
279
+ # Create Gradio interface
280
+ with gr.Blocks() as demo:
281
+ gr.Markdown("# Transcription Format Converter")
282
+ gr.Markdown("Convert Transcribe JSON format to SRT subtitle format with configurable options")
283
+ gr.Markdown("Built with [anycoder](https://huggingface.co/spaces/akhaliq/anycoder)")
284
+
285
+ with gr.Row():
286
+ with gr.Column():
287
+ # Input section
288
+ json_input = gr.Textbox(
289
+ label="Transcribe JSON Input",
290
+ placeholder='{"text": "Full text", "chunks": [{"text": "Segment 1", "timestamp": [0, 2.5]}, ...]}',
291
+ lines=10
292
+ )
293
+
294
+ # Parameters
295
+ word_break_threshold = gr.Slider(
296
+ minimum=5,
297
+ maximum=20,
298
+ value=10,
299
+ step=1,
300
+ label="Word Break Threshold"
301
+ )
302
+
303
+ language = gr.Dropdown(
304
+ choices=["en", "es", "fr"],
305
+ value="en",
306
+ label="Language"
307
+ )
308
+
309
+ include_speaker = gr.Checkbox(
310
+ label="Include Speaker Information",
311
+ value=False
312
+ )
313
+
314
+ convert_btn = gr.Button("Convert to SRT", variant="primary")
315
+
316
+ with gr.Column():
317
+ # Output section
318
+ srt_output = gr.Textbox(
319
+ label="SRT Output",
320
+ lines=15,
321
+ placeholder="SRT formatted subtitles will appear here..."
322
+ )
323
+
324
+ status_message = gr.Textbox(
325
+ label="Status",
326
+ interactive=False
327
+ )
328
+
329
+ # Examples
330
+ examples = gr.Examples(
331
+ examples=[
332
+ [
333
+ '{"text": "Hello world. This is a test. How are you today?", "chunks": [{"text": "Hello world.", "timestamp": [0, 1.5]}, {"text": "This is a test.", "timestamp": [1.5, 3.2]}, {"text": "How are you today?", "timestamp": [3.2, 5.0]}]}',
334
+ 10,
335
+ "en",
336
+ False
337
+ ],
338
+ [
339
+ '{"text": "Hola mundo. Esto es una prueba. 驴C贸mo est谩s hoy?", "chunks": [{"text": "Hola mundo.", "timestamp": [0, 1.5]}, {"text": "Esto es una prueba.", "timestamp": [1.5, 3.2]}, {"text": "驴C贸mo est谩s hoy?", "timestamp": [3.2, 5.0]}]}',
340
+ 10,
341
+ "es",
342
+ False
343
+ ]
344
+ ],
345
+ inputs=[json_input, word_break_threshold, language, include_speaker],
346
+ outputs=[srt_output, status_message],
347
+ fn=convert_transcription,
348
+ cache_examples=True,
349
+ label="Examples"
350
+ )
351
+
352
+ # Event listener
353
+ convert_btn.click(
354
+ fn=convert_transcription,
355
+ inputs=[json_input, word_break_threshold, language, include_speaker],
356
+ outputs=[srt_output, status_message],
357
+ api_visibility="public"
358
+ )
359
+
360
+ # Launch with modern theme and styling
361
+ demo.launch(
362
+ theme=gr.themes.Soft(
363
+ primary_hue="blue",
364
+ secondary_hue="indigo",
365
+ neutral_hue="slate",
366
+ font=gr.themes.GoogleFont("Inter"),
367
+ text_size="lg",
368
+ spacing_size="lg",
369
+ radius_size="md"
370
+ ).set(
371
+ button_primary_background_fill="*primary_600",
372
+ button_primary_background_fill_hover="*primary_700",
373
+ block_title_text_weight="600",
374
+ ),
375
+ footer_links=[
376
+ {"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"},
377
+ {"label": "Gradio Docs", "url": "https://www.gradio.app/docs"},
378
+ {"label": "GitHub", "url": "https://github.com/gradio-app/gradio"}
379
+ ],
380
+ css="""
381
+ .gradio-container {
382
+ max-width: 1200px !important;
383
+ margin: 0 auto !important;
384
+ }
385
+ .gr-box {
386
+ border-radius: 8px !important;
387
+ }
388
+ """
389
+ )
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio>=6.0
2
+ requests
3
+ Pillow