AndreySokolov01 commited on
Commit
23f1f1e
·
verified ·
1 Parent(s): 9334e1f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -45
app.py CHANGED
@@ -25,7 +25,7 @@ PRESETS = {
25
  "Viral Shorts (TOP)": {
26
  "Alignment": 8,
27
  "FontName": "Arial Black",
28
- "FontSize": 12,
29
  "PrimaryColour": "#FFFF00",
30
  "Outline": 4,
31
  "OutlineColour": "#000000",
@@ -38,7 +38,7 @@ PRESETS = {
38
  "Минимал низ": {
39
  "Alignment": 2,
40
  "FontName": "Montserrat",
41
- "FontSize": 12,
42
  "PrimaryColour": "#17FC03",
43
  "Outline": 1,
44
  "OutlineColour": "#000000",
@@ -82,24 +82,44 @@ def format_subtitle_text(text, font_size, bold):
82
  text = text.upper()
83
  else:
84
  text = text.capitalize()
85
- wrap_width = 18 if font_size >= 48 else 36
 
 
 
 
 
 
 
 
 
 
86
  return "\n".join(wrap(text, wrap_width))
87
 
88
- # === Модели (без изменений) ===
89
  def get_whisper():
90
- if "whisper" in _cache: return _cache["whisper"]
 
91
  device = "cuda" if torch.cuda.is_available() else "cpu"
92
  dtype = torch.float16 if device == "cuda" else torch.float32
93
  processor = WhisperProcessor.from_pretrained(WHISPER_MODEL)
94
- model = WhisperForConditionalGeneration.from_pretrained(WHISPER_MODEL, torch_dtype=dtype, low_cpu_mem_usage=True)
95
- pipe = pipeline("automatic-speech-recognition", model=model, tokenizer=processor.tokenizer,
96
- feature_extractor=processor.feature_extractor, return_timestamps=True,
97
- chunk_length_s=CHUNK_LENGTH, device=0 if device == "cuda" else -1)
 
 
 
 
 
 
 
 
98
  _cache["whisper"] = pipe
99
  return pipe
100
 
101
  def get_faster_whisper():
102
- if "faster" in _cache: return _cache["faster"]
 
103
  device = "cuda" if torch.cuda.is_available() else "cpu"
104
  compute = "float16" if device == "cuda" else "int8"
105
  model = FasterWhisperModel(FASTERW_MODEL, device=device, compute_type=compute)
@@ -107,13 +127,15 @@ def get_faster_whisper():
107
  return model
108
 
109
  def get_parakeet():
110
- if "parakeet" in _cache: return _cache["parakeet"]
 
111
  model = EncDecRNNTBPEModel.from_pretrained(PARAKEET_MODEL)
112
  model.eval()
113
  model = model.to("cuda" if torch.cuda.is_available() else "cpu")
114
  _cache["parakeet"] = model
115
  return model
116
 
 
117
  def transcribe(audio, backend):
118
  if backend == "Whisper":
119
  pipe = get_whisper()
@@ -130,11 +152,15 @@ def transcribe(audio, backend):
130
  step = 6
131
  for i in range(0, len(out), step):
132
  g = out[i:i+step]
133
- chunks.append({"start": g[0]["start"], "end": g[-1]["end"], "text": " ".join(w["word"] for w in g)})
 
 
 
 
134
  return chunks
135
 
136
- # === НОВАЯ ФУНКЦИЯ: preview первого кадра с субтитрами ===
137
- def preview_subtitle_style(video_path, backend, font, size, color, bg, bold, margin):
138
  if not video_path:
139
  return None
140
 
@@ -143,7 +169,7 @@ def preview_subtitle_style(video_path, backend, font, size, color, bg, bold, mar
143
  ass_file = os.path.join(tmp, "preview.ass")
144
 
145
  try:
146
- # 1. Извлекаем первый кадр
147
  subprocess.run([
148
  'ffmpeg', '-y', '-i', video_path, '-vframes', '1', '-q:v', '2', frame
149
  ], check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
@@ -151,20 +177,11 @@ def preview_subtitle_style(video_path, backend, font, size, color, bg, bold, mar
151
  if not os.path.exists(frame):
152
  return None
153
 
154
- # 2. Извлекаем аудио и транскрибируем (минимально)
155
- wav = os.path.join(tmp, "audio.wav")
156
- subprocess.run([
157
- 'ffmpeg', '-y', '-i', video_path, '-vn', '-ac', '1', '-ar', '16000', wav
158
- ], check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
159
-
160
- segs = transcribe(wav, backend)
161
- if not segs:
162
- return None
163
-
164
- first_seg = segs[0]
165
- styled_text = format_subtitle_text(first_seg["text"], size, bold)
166
 
167
- # 3. Генерируем .ass стиль
168
  style = {
169
  "FontName": font,
170
  "FontSize": int(size),
@@ -172,7 +189,7 @@ def preview_subtitle_style(video_path, backend, font, size, color, bg, bold, mar
172
  "BackColour": bg,
173
  "Bold": int(bold),
174
  "MarginV": int(margin),
175
- "Alignment": 2, # bottom center — можно менять
176
  "Outline": 1,
177
  "OutlineColour": "&H00000000",
178
  "BorderStyle": 1,
@@ -180,7 +197,7 @@ def preview_subtitle_style(video_path, backend, font, size, color, bg, bold, mar
180
  }
181
  style_str = style_to_force(style)
182
 
183
- # 4. Создаём .ass файл
184
  with open(ass_file, "w", encoding="utf-8") as f:
185
  f.write("[Script Info]\n")
186
  f.write("ScriptType: v4.00+\n")
@@ -191,10 +208,9 @@ def preview_subtitle_style(video_path, backend, font, size, color, bg, bold, mar
191
  f.write(f"Style: Default,{style_str}\n\n")
192
  f.write("[Events]\n")
193
  f.write("Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n")
194
- # Используем длительность 5 секунд для preview
195
  f.write(f"Dialogue: 0,0:00:00.00,0:00:05.00,Default,,0,0,0,,{styled_text}\n")
196
 
197
- # 5. Накладываем субтитры на кадр
198
  preview_img = os.path.join(tmp, "preview.jpg")
199
  safe_ass = ass_file.replace("\\", "/").replace(":", "\\:")
200
  subprocess.run([
@@ -210,7 +226,7 @@ def preview_subtitle_style(video_path, backend, font, size, color, bg, bold, mar
210
 
211
  return None
212
 
213
- # === Основная обработка (без изменений, кроме импорта) ===
214
  def process(video, backend, preset, font, size, color, bg, bold, margin):
215
  if not video:
216
  return "❌ Нет видео", None, None, "", None
@@ -260,9 +276,9 @@ def process(video, backend, preset, font, size, color, bg, bold, margin):
260
  except Exception as e:
261
  return f"❌ Ошибка: {str(e)}", None, None, "", None
262
 
263
- # === Интерфейс ===
264
  with gr.Blocks() as demo:
265
- gr.Markdown("## 🎬 Автосубтитры + LIVE preview текста + Превью стиля")
266
 
267
  with gr.Row():
268
  with gr.Column():
@@ -272,34 +288,32 @@ with gr.Blocks() as demo:
272
 
273
  gr.Markdown("### 🎨 Ручная настройка")
274
  font = gr.Textbox("Montserrat", label="Шрифт")
275
- size = gr.Slider(5, 72, 32, label="Размер")
276
  color = gr.ColorPicker("#FFFFFF", label="Цвет текста")
277
  bg = gr.ColorPicker("#80000000", label="Фон")
278
  bold = gr.Checkbox(True, label="Bold")
279
- margin = gr.Slider(10, 100, 40, label="Отступ")
280
 
281
  with gr.Row():
282
- run_btn = gr.Button("🚀 Сгенерировать")
283
  preview_btn = gr.Button("👁️ Превью стиля")
284
 
285
  with gr.Column():
286
  status = gr.Markdown()
287
- preview = gr.Textbox(label="LIVE preview текста", lines=8)
288
- preview_img = gr.Image(label="Превью субтитров на кадре", type="filepath")
289
- out_video = gr.Video()
290
- out_srt = gr.File()
291
 
292
- # Обработка полного видео
293
  run_btn.click(
294
  process,
295
  inputs=[video, backend, preset, font, size, color, bg, bold, margin],
296
  outputs=[status, out_video, out_srt, preview, preview_img]
297
  )
298
 
299
- # Превью стиля (быстро!)
300
  preview_btn.click(
301
  preview_subtitle_style,
302
- inputs=[video, backend, font, size, color, bg, bold, margin],
303
  outputs=[preview_img]
304
  )
305
 
 
25
  "Viral Shorts (TOP)": {
26
  "Alignment": 8,
27
  "FontName": "Arial Black",
28
+ "FontSize": 64,
29
  "PrimaryColour": "#FFFF00",
30
  "Outline": 4,
31
  "OutlineColour": "#000000",
 
38
  "Минимал низ": {
39
  "Alignment": 2,
40
  "FontName": "Montserrat",
41
+ "FontSize": 28,
42
  "PrimaryColour": "#17FC03",
43
  "Outline": 1,
44
  "OutlineColour": "#000000",
 
82
  text = text.upper()
83
  else:
84
  text = text.capitalize()
85
+
86
+ # Адаптивная ширина переноса
87
+ if font_size >= 60:
88
+ wrap_width = 12
89
+ elif font_size >= 48:
90
+ wrap_width = 16
91
+ elif font_size >= 36:
92
+ wrap_width = 24
93
+ else:
94
+ wrap_width = 36
95
+
96
  return "\n".join(wrap(text, wrap_width))
97
 
98
+ # === Загрузка моделей ===
99
  def get_whisper():
100
+ if "whisper" in _cache:
101
+ return _cache["whisper"]
102
  device = "cuda" if torch.cuda.is_available() else "cpu"
103
  dtype = torch.float16 if device == "cuda" else torch.float32
104
  processor = WhisperProcessor.from_pretrained(WHISPER_MODEL)
105
+ model = WhisperForConditionalGeneration.from_pretrained(
106
+ WHISPER_MODEL, torch_dtype=dtype, low_cpu_mem_usage=True
107
+ )
108
+ pipe = pipeline(
109
+ "automatic-speech-recognition",
110
+ model=model,
111
+ tokenizer=processor.tokenizer,
112
+ feature_extractor=processor.feature_extractor,
113
+ return_timestamps=True,
114
+ chunk_length_s=CHUNK_LENGTH,
115
+ device=0 if device == "cuda" else -1,
116
+ )
117
  _cache["whisper"] = pipe
118
  return pipe
119
 
120
  def get_faster_whisper():
121
+ if "faster" in _cache:
122
+ return _cache["faster"]
123
  device = "cuda" if torch.cuda.is_available() else "cpu"
124
  compute = "float16" if device == "cuda" else "int8"
125
  model = FasterWhisperModel(FASTERW_MODEL, device=device, compute_type=compute)
 
127
  return model
128
 
129
  def get_parakeet():
130
+ if "parakeet" in _cache:
131
+ return _cache["parakeet"]
132
  model = EncDecRNNTBPEModel.from_pretrained(PARAKEET_MODEL)
133
  model.eval()
134
  model = model.to("cuda" if torch.cuda.is_available() else "cpu")
135
  _cache["parakeet"] = model
136
  return model
137
 
138
+ # === Транскрибация ===
139
  def transcribe(audio, backend):
140
  if backend == "Whisper":
141
  pipe = get_whisper()
 
152
  step = 6
153
  for i in range(0, len(out), step):
154
  g = out[i:i+step]
155
+ chunks.append({
156
+ "start": g[0]["start"],
157
+ "end": g[-1]["end"],
158
+ "text": " ".join(w["word"] for w in g)
159
+ })
160
  return chunks
161
 
162
+ # === Превью стиля (без транскрибации!) ===
163
+ def preview_subtitle_style(video_path, font, size, color, bg, bold, margin):
164
  if not video_path:
165
  return None
166
 
 
169
  ass_file = os.path.join(tmp, "preview.ass")
170
 
171
  try:
172
+ # Извлекаем первый кадр
173
  subprocess.run([
174
  'ffmpeg', '-y', '-i', video_path, '-vframes', '1', '-q:v', '2', frame
175
  ], check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
 
177
  if not os.path.exists(frame):
178
  return None
179
 
180
+ # Фиксированный текст для превью
181
+ example_text = "Тут ваши субтитры"
182
+ styled_text = format_subtitle_text(example_text, size, bold)
 
 
 
 
 
 
 
 
 
183
 
184
+ # Стиль
185
  style = {
186
  "FontName": font,
187
  "FontSize": int(size),
 
189
  "BackColour": bg,
190
  "Bold": int(bold),
191
  "MarginV": int(margin),
192
+ "Alignment": 2,
193
  "Outline": 1,
194
  "OutlineColour": "&H00000000",
195
  "BorderStyle": 1,
 
197
  }
198
  style_str = style_to_force(style)
199
 
200
+ # Создаём .ass
201
  with open(ass_file, "w", encoding="utf-8") as f:
202
  f.write("[Script Info]\n")
203
  f.write("ScriptType: v4.00+\n")
 
208
  f.write(f"Style: Default,{style_str}\n\n")
209
  f.write("[Events]\n")
210
  f.write("Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n")
 
211
  f.write(f"Dialogue: 0,0:00:00.00,0:00:05.00,Default,,0,0,0,,{styled_text}\n")
212
 
213
+ # Накладываем на кадр
214
  preview_img = os.path.join(tmp, "preview.jpg")
215
  safe_ass = ass_file.replace("\\", "/").replace(":", "\\:")
216
  subprocess.run([
 
226
 
227
  return None
228
 
229
+ # === Полная обработка видео ===
230
  def process(video, backend, preset, font, size, color, bg, bold, margin):
231
  if not video:
232
  return "❌ Нет видео", None, None, "", None
 
276
  except Exception as e:
277
  return f"❌ Ошибка: {str(e)}", None, None, "", None
278
 
279
+ # === Интерфейс Gradio ===
280
  with gr.Blocks() as demo:
281
+ gr.Markdown("## 🎬 Автосубтитры (Whisper / FasterWhisper / NeMo) + LIVE preview + Превью стиля")
282
 
283
  with gr.Row():
284
  with gr.Column():
 
288
 
289
  gr.Markdown("### 🎨 Ручная настройка")
290
  font = gr.Textbox("Montserrat", label="Шрифт")
291
+ size = gr.Slider(minimum=10, maximum=96, value=32, step=1, label="Размер шрифта")
292
  color = gr.ColorPicker("#FFFFFF", label="Цвет текста")
293
  bg = gr.ColorPicker("#80000000", label="Фон")
294
  bold = gr.Checkbox(True, label="Bold")
295
+ margin = gr.Slider(10, 100, 40, label="Отступ снизу")
296
 
297
  with gr.Row():
298
+ run_btn = gr.Button("🚀 Сгенерировать субтитры")
299
  preview_btn = gr.Button("👁️ Превью стиля")
300
 
301
  with gr.Column():
302
  status = gr.Markdown()
303
+ preview = gr.Textbox(label="LIVE preview текста субтитров", lines=8)
304
+ preview_img = gr.Image(label="Превью стиля на кадре", type="filepath")
305
+ out_video = gr.Video(label="Видео с субтитрами")
306
+ out_srt = gr.File(label="SRT файл")
307
 
 
308
  run_btn.click(
309
  process,
310
  inputs=[video, backend, preset, font, size, color, bg, bold, margin],
311
  outputs=[status, out_video, out_srt, preview, preview_img]
312
  )
313
 
 
314
  preview_btn.click(
315
  preview_subtitle_style,
316
+ inputs=[video, font, size, color, bg, bold, margin],
317
  outputs=[preview_img]
318
  )
319