jakgritb commited on
Commit
c511df7
·
verified ·
1 Parent(s): e896716

fix: add per-segment HRE edit plans

Browse files
README.md CHANGED
@@ -96,11 +96,11 @@ where:
96
  │ │
97
  │ Normal Mode HRE (High-Retention Editing) │
98
  │ ───────────── ────────────────────────────── │
99
- │ • pysubs2 ASS • Silence removal (ffmpeg)
100
- │ • User style config • Auto-zoom to face (zoompan)
101
- │ • Font/color/animation • Jump cuts at boundaries
102
- │ • Karaoke/pop/fade • Qwen2.5-VL emoji selection
103
- │ • AMD AMF encode • Impact bold captions
104
  └──────────────────────────────────────────────────────────────────┘
105
 
106
 
@@ -139,11 +139,12 @@ Full creative control over:
139
 
140
  ### High-Retention Editing (HRE)
141
  AI chooses everything:
142
- - Silence removal (`ffmpeg silenceremove`)
143
- - Auto-zoom to face region (`ffmpeg zoompan` using Qwen2.5-VL face_bbox)
144
- - Jump cuts at scene boundaries
 
 
145
  - Qwen2.5-VL selects contextually-appropriate emoji overlay
146
- - Impact 64px bold white captions, word-by-word, pop animation
147
 
148
  ---
149
 
 
96
  │ │
97
  │ Normal Mode HRE (High-Retention Editing) │
98
  │ ───────────── ────────────────────────────── │
99
+ │ • pysubs2 ASS • Per-segment AI edit plan
100
+ │ • User style config • Auto-zoom per segment (zoompan)
101
+ │ • Font/color/animation • Word / phrase / sentence captions
102
+ │ • Karaoke/pop/fade • Top / bottom / left / right captions
103
+ │ • AMD AMF encode • Qwen2.5-VL emoji selection
104
  └──────────────────────────────────────────────────────────────────┘
105
 
106
 
 
139
 
140
  ### High-Retention Editing (HRE)
141
  AI chooses everything:
142
+ - A per-segment edit plan with timestamps
143
+ - Auto-zoom direction and speed per segment (`ffmpeg zoompan`)
144
+ - Caption mode per segment: word, phrase, or sentence
145
+ - Caption placement per segment: top, bottom, left, right, or center
146
+ - Caption color, size, and pop emphasis based on segment energy
147
  - Qwen2.5-VL selects contextually-appropriate emoji overlay
 
148
 
149
  ---
150
 
backend/src/analysis/vision.py CHANGED
@@ -172,7 +172,9 @@ Respond ONLY with valid JSON — no markdown:
172
  "face_detected": <true|false>,
173
  "face_cx": <0.0-1.0>,
174
  "face_cy": <0.0-1.0>,
175
- "subtitle_position": "<top|bottom>",
 
 
176
  "subtitle_color": "<white|yellow|cyan|orange|green>",
177
  "energy_level": "<high|medium|low>",
178
  "moment_type": "<hook|punchline|context|reaction|transition>"
@@ -184,8 +186,13 @@ Rules:
184
  - zoom IN slow: context, buildup, moderate energy
185
  - zoom OUT: reveals, breathing room after intensity
186
  - HOLD: stable content, text-heavy moments
187
- - subtitle TOP: face is in bottom half → put text at top
188
- - subtitle BOTTOM: face is in top half text at bottom
 
 
 
 
 
189
  - face_cx/face_cy: face center as 0.0-1.0 fraction of frame
190
  """
191
 
@@ -196,7 +203,7 @@ def analyze_frame_for_hre(
196
  seg_idx: int = 0,
197
  n_total: int = 1,
198
  ) -> dict:
199
- """Per-segment HRE: zoom direction, subtitle position+color for this moment."""
200
  try:
201
  from openai import OpenAI
202
 
@@ -227,11 +234,12 @@ def analyze_frame_for_hre(
227
  if raw.startswith("json"):
228
  raw = raw[4:]
229
 
230
- analysis = json.loads(raw.strip())
231
  logger.debug(
232
  f"HRE seg {seg_idx}/{n_total}: "
233
  f"zoom={analysis.get('zoom_direction')}({analysis.get('zoom_speed')}) "
234
- f"sub={analysis.get('subtitle_position')}/{analysis.get('subtitle_color')} "
 
235
  f"type={analysis.get('moment_type')}"
236
  )
237
  try:
@@ -257,8 +265,10 @@ def _default_hre_analysis(seg_idx: int = 0, n_total: int = 1) -> dict:
257
  else:
258
  zoom_dir, zoom_speed, moment = "in", "slow", "reaction"
259
 
260
- _colors = ["yellow", "white", "cyan", "orange", "white", "yellow"]
261
- _positions = ["bottom", "top", "bottom", "top", "bottom", "top"]
 
 
262
 
263
  return {
264
  "zoom_direction": zoom_dir,
@@ -267,6 +277,8 @@ def _default_hre_analysis(seg_idx: int = 0, n_total: int = 1) -> dict:
267
  "face_cx": 0.5,
268
  "face_cy": 0.38,
269
  "subtitle_position": _positions[seg_idx % len(_positions)],
 
 
270
  "subtitle_color": _colors[seg_idx % len(_colors)],
271
  "energy_level": "medium",
272
  "moment_type": moment,
 
172
  "face_detected": <true|false>,
173
  "face_cx": <0.0-1.0>,
174
  "face_cy": <0.0-1.0>,
175
+ "subtitle_position": "<top|bottom|left|right|center>",
176
+ "subtitle_mode": "<word|phrase|sentence>",
177
+ "subtitle_emphasis": "<pop|punch|calm>",
178
  "subtitle_color": "<white|yellow|cyan|orange|green>",
179
  "energy_level": "<high|medium|low>",
180
  "moment_type": "<hook|punchline|context|reaction|transition>"
 
186
  - zoom IN slow: context, buildup, moderate energy
187
  - zoom OUT: reveals, breathing room after intensity
188
  - HOLD: stable content, text-heavy moments
189
+ - subtitle WORD: short hooks, reactions, punchlines, important keywords
190
+ - subtitle PHRASE: fast but understandable speech, 2-4 words at a time
191
+ - subtitle SENTENCE: explanation, normal conversation, low/medium energy
192
+ - subtitle TOP: face is in bottom half
193
+ - subtitle BOTTOM: face is in top half
194
+ - subtitle LEFT/RIGHT: face or main object is on the opposite side
195
+ - Avoid choosing the exact same subtitle_position and subtitle_mode for every segment.
196
  - face_cx/face_cy: face center as 0.0-1.0 fraction of frame
197
  """
198
 
 
203
  seg_idx: int = 0,
204
  n_total: int = 1,
205
  ) -> dict:
206
+ """Per-segment HRE: zoom, caption placement, caption mode, and color."""
207
  try:
208
  from openai import OpenAI
209
 
 
234
  if raw.startswith("json"):
235
  raw = raw[4:]
236
 
237
+ analysis = {**_default_hre_analysis(seg_idx, n_total), **json.loads(raw.strip())}
238
  logger.debug(
239
  f"HRE seg {seg_idx}/{n_total}: "
240
  f"zoom={analysis.get('zoom_direction')}({analysis.get('zoom_speed')}) "
241
+ f"sub={analysis.get('subtitle_position')}/{analysis.get('subtitle_mode')}/"
242
+ f"{analysis.get('subtitle_color')} "
243
  f"type={analysis.get('moment_type')}"
244
  )
245
  try:
 
265
  else:
266
  zoom_dir, zoom_speed, moment = "in", "slow", "reaction"
267
 
268
+ _colors = ["yellow", "white", "cyan", "orange", "white", "yellow"]
269
+ _positions = ["bottom", "top", "left", "bottom", "right", "top"]
270
+ _modes = ["word", "sentence", "phrase", "word", "sentence", "phrase"]
271
+ _emphasis = ["punch", "calm", "pop", "punch", "calm", "pop"]
272
 
273
  return {
274
  "zoom_direction": zoom_dir,
 
277
  "face_cx": 0.5,
278
  "face_cy": 0.38,
279
  "subtitle_position": _positions[seg_idx % len(_positions)],
280
+ "subtitle_mode": _modes[seg_idx % len(_modes)],
281
+ "subtitle_emphasis": _emphasis[seg_idx % len(_emphasis)],
282
  "subtitle_color": _colors[seg_idx % len(_colors)],
283
  "energy_level": "medium",
284
  "moment_type": moment,
backend/src/processing/high_retention.py CHANGED
@@ -1,15 +1,17 @@
1
  """High-Retention Editing pipeline — per-segment AI decisions.
2
 
3
  Each 3-5s segment gets its own zoom direction, subtitle position,
4
- and caption color driven by Qwen2.5-VL analyzing one frame per segment.
 
5
 
6
  Pipeline per clip:
7
  1. Segment clip at speech pauses (3-5s chunks)
8
  2. Extract midpoint frame from each segment
9
  3. Qwen2.5-VL analyzes each frame → zoom + subtitle decisions
10
  4. ffmpeg filter_complex: per-segment zoompan + concat
11
- 5. ASS subtitles with per-segment alignment/color/size override tags
12
  """
 
13
  import subprocess
14
  import tempfile
15
  from pathlib import Path
@@ -175,35 +177,25 @@ def _build_zoom_exprs(
175
 
176
  if direction == "in":
177
  if speed == "fast":
178
- z_expr, max_zoom = "min(1.2+n*0.0014\\,1.6)", 1.6
179
  else:
180
- z_expr, max_zoom = "min(1.05+n*0.0006\\,1.35)", 1.35
181
  elif direction == "out":
182
  if speed == "fast":
183
- z_expr, max_zoom = "max(1.6-n*0.0016\\,1.0)", 1.6
184
  else:
185
- z_expr, max_zoom = "max(1.4-n*0.0010\\,1.0)", 1.4
186
  else: # hold
187
- z_expr, max_zoom = "1.1", 1.1
188
 
189
  if face_detected and direction == "in" and max_zoom > 1.05:
190
- raw_cx = int(face_cx * w - w / (max_zoom * 2))
191
- raw_cy = int(face_cy * h - h / (max_zoom * 2))
192
- safe_cx = max(0, min(w - int(w / max_zoom), raw_cx))
193
- safe_cy = max(0, min(h - int(h / max_zoom), raw_cy))
194
- ctr_x = w / 2 - w / (max_zoom * 2)
195
- ctr_y = h / 2 - h / (max_zoom * 2)
196
- x_expr = (
197
- f"(iw/2-(iw/zoom/2))+({safe_cx}-{ctr_x:.1f})*(zoom-1)/({max_zoom}-1)"
198
- )
199
- y_expr = (
200
- f"(ih/2-(ih/zoom/2))+({safe_cy}-{ctr_y:.1f})*(zoom-1)/({max_zoom}-1)"
201
- )
202
  else:
203
  x_expr = "iw/2-(iw/zoom/2)"
204
  if direction == "in":
205
  y_bias = min(face_cy, 0.5) if face_cy < 0.55 else 0.38
206
- y_expr = f"ih*{y_bias:.2f}-(ih/zoom/2)"
207
  else:
208
  y_expr = "ih/2-(ih/zoom/2)"
209
 
@@ -231,10 +223,12 @@ def _apply_per_segment_zoom(
231
  e = f"{seg['end']:.3f}"
232
  z, x, y = _build_zoom_exprs(analysis, w, h)
233
  zp = f"zoompan=z='{z}':x='{x}':y='{y}':d=1:s={w}x{h}:fps=30"
234
- filter_parts.append(f"[0:v]trim={s}:{e},setpts=PTS-STARTPTS,{zp}[v{i}]")
 
 
235
  v_labels.append(f"[v{i}]")
236
  if has_audio:
237
- filter_parts.append(f"[0:a]atrim={s}:{e},asetpts=PTS-STARTPTS[a{i}]")
238
  a_labels.append(f"[a{i}]")
239
 
240
  n = len(segments)
@@ -270,12 +264,377 @@ _ASS_COLORS = {
270
  "red": "&H000000FF",
271
  }
272
 
 
 
 
 
273
 
274
  def _ts(t: float) -> str:
275
- h = int(t // 3600)
276
- m = int((t % 3600) // 60)
277
- s = t % 60
278
- return f"{h}:{m:02d}:{s:06.3f}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
279
 
280
 
281
  def _generate_per_segment_subtitles(
@@ -285,42 +644,14 @@ def _generate_per_segment_subtitles(
285
  segments: list[dict],
286
  analyses: list[dict],
287
  ) -> None:
288
- """Write ASS with per-segment alignment, color, and font-size overrides."""
289
- events: list[dict] = []
290
 
291
- # Word-level events
292
- for seg in transcript.get("segments", []):
293
- for w in seg.get("words", []):
294
- t0 = max(0.0, float(w.get("start", 0)) - clip_start)
295
- t1 = max(0.0, float(w.get("end", 0)) - clip_start)
296
- text = w.get("word", w.get("text", "")).strip()
297
- if text and t1 > 0:
298
- events.append({"start": t0, "end": max(t1, t0 + 0.08), "text": text})
299
-
300
- # Sentence-level fallback (split into 3-word chunks)
301
- if not events:
302
- for seg in transcript.get("segments", []):
303
- t0 = max(0.0, float(seg.get("start", 0)) - clip_start)
304
- t1 = max(0.0, float(seg.get("end", 0)) - clip_start)
305
- text = seg.get("text", "").strip()
306
- if not text or t1 <= 0:
307
- continue
308
- wlist = text.split()
309
- chunk = 3
310
- n_ch = max(1, (len(wlist) + chunk - 1) // chunk)
311
- dur = (t1 - t0) / n_ch
312
- for j in range(n_ch):
313
- events.append({
314
- "start": t0 + j * dur,
315
- "end": t0 + (j + 1) * dur,
316
- "text": " ".join(wlist[j * chunk:(j + 1) * chunk]),
317
- })
318
-
319
- def get_an(t: float) -> dict:
320
- for seg, an in zip(segments, analyses):
321
- if seg["start"] <= t < seg["end"]:
322
- return an
323
- return analyses[-1] if analyses else {}
324
 
325
  lines = [
326
  "[Script Info]",
@@ -334,36 +665,28 @@ def _generate_per_segment_subtitles(
334
  "OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, "
335
  "ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, "
336
  "Alignment, MarginL, MarginR, MarginV, Encoding",
337
- "Style: Default,Impact,90,&H00FFFFFF,&H0000FFFF,&H00000000,&H80000000,"
338
- "-1,0,0,0,100,100,0,0,1,4,0,2,40,40,200,1",
339
  "",
340
  "[Events]",
341
  "Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text",
342
  ]
343
 
344
  for ev in events:
345
- an = get_an(ev["start"])
346
- color = _ASS_COLORS.get(an.get("subtitle_color", "white"), "&H00FFFFFF")
347
- pos = an.get("subtitle_position", "bottom")
348
- energy = an.get("energy_level", "medium")
349
- moment = an.get("moment_type", "context")
350
-
351
- alignment = 8 if pos == "top" else 2
352
- margin_v = 120 if pos == "top" else 200
353
- fs = (108 if energy == "high" or moment in ("hook", "punchline")
354
- else 80 if energy == "low" else 92)
355
-
356
- # Pop animation: start 130% scale, shrink to 100% in 120ms
357
- pop = "{\\fscx130\\fscy130\\t(0,120,\\fscx100\\fscy100)}"
358
- tag = f"{{\\an{alignment}\\1c{color}&\\fs{fs}\\b1}}{pop}"
359
 
360
  lines.append(
361
  f"Dialogue: 0,{_ts(ev['start'])},{_ts(ev['end'])},"
362
- f"Default,,0,0,{margin_v},,{tag}{ev['text'].upper()}"
363
  )
364
 
365
  ass_path.write_text("\n".join(lines), encoding="utf-8")
366
- logger.debug(f"ASS: {len(events)} events across {len(segments)} segments")
 
 
367
 
368
 
369
  # ─── Emoji ─────────────────────────────────────────────────────────────────────
@@ -437,7 +760,7 @@ def apply_hre(
437
  transcript: dict,
438
  output_path: Path,
439
  ) -> Path:
440
- """Apply per-segment AI-driven HRE: each 3-5s chunk gets its own zoom + subtitle style."""
441
  output_path.parent.mkdir(parents=True, exist_ok=True)
442
  clip_start = clip_data.get("start", 0.0)
443
 
@@ -464,26 +787,28 @@ def apply_hre(
464
  _analyze_segment(clip_path, seg, i, n, transcript, clip_start, tmp_dir)
465
  for i, seg in enumerate(segments)
466
  ]
 
467
 
468
- for i, (seg, an) in enumerate(zip(segments, analyses)):
469
  logger.info(
470
  f" [{seg['start']:.1f}s-{seg['end']:.1f}s] "
471
  f"zoom={an.get('zoom_direction')}({an.get('zoom_speed')}) "
472
- f"sub={an.get('subtitle_position')}/{an.get('subtitle_color')} "
 
473
  f"type={an.get('moment_type')} energy={an.get('energy_level')}"
474
  )
475
 
476
  # 3. Per-segment zoom via filter_complex
477
  zoomed = _apply_per_segment_zoom(
478
- clip_path, segments, analyses, w, h, tmp_zoomed, has_audio=has_audio
479
  )
480
 
481
  # 4. Per-segment ASS subtitles
482
  ass_path = output_path.with_suffix(".ass")
483
- _generate_per_segment_subtitles(transcript, ass_path, clip_start, segments, analyses)
484
 
485
  # 5. Emoji from highest-energy segment
486
- emoji = _get_emoji(clip_data, analyses)
487
 
488
  # 6. Render
489
  _render_final(zoomed, ass_path, emoji, output_path)
 
1
  """High-Retention Editing pipeline — per-segment AI decisions.
2
 
3
  Each 3-5s segment gets its own zoom direction, subtitle position,
4
+ subtitle mode, and caption color driven by Qwen2.5-VL analyzing one
5
+ frame plus the local transcript for that segment.
6
 
7
  Pipeline per clip:
8
  1. Segment clip at speech pauses (3-5s chunks)
9
  2. Extract midpoint frame from each segment
10
  3. Qwen2.5-VL analyzes each frame → zoom + subtitle decisions
11
  4. ffmpeg filter_complex: per-segment zoompan + concat
12
+ 5. ASS subtitles with per-segment alignment/color/mode override tags
13
  """
14
+ import json
15
  import subprocess
16
  import tempfile
17
  from pathlib import Path
 
177
 
178
  if direction == "in":
179
  if speed == "fast":
180
+ z_expr, max_zoom = "min(1.12+on*0.0018\\,1.55)", 1.55
181
  else:
182
+ z_expr, max_zoom = "min(1.04+on*0.0009\\,1.32)", 1.32
183
  elif direction == "out":
184
  if speed == "fast":
185
+ z_expr, max_zoom = "max(1.48-on*0.0018\\,1.0)", 1.48
186
  else:
187
+ z_expr, max_zoom = "max(1.28-on*0.0009\\,1.0)", 1.28
188
  else: # hold
189
+ z_expr, max_zoom = "1.08", 1.08
190
 
191
  if face_detected and direction == "in" and max_zoom > 1.05:
192
+ x_expr = f"max(0\\,min(iw-iw/zoom\\,iw*{face_cx:.3f}-iw/zoom/2))"
193
+ y_expr = f"max(0\\,min(ih-ih/zoom\\,ih*{face_cy:.3f}-ih/zoom/2))"
 
 
 
 
 
 
 
 
 
 
194
  else:
195
  x_expr = "iw/2-(iw/zoom/2)"
196
  if direction == "in":
197
  y_bias = min(face_cy, 0.5) if face_cy < 0.55 else 0.38
198
+ y_expr = f"max(0\\,min(ih-ih/zoom\\,ih*{y_bias:.2f}-(ih/zoom/2)))"
199
  else:
200
  y_expr = "ih/2-(ih/zoom/2)"
201
 
 
223
  e = f"{seg['end']:.3f}"
224
  z, x, y = _build_zoom_exprs(analysis, w, h)
225
  zp = f"zoompan=z='{z}':x='{x}':y='{y}':d=1:s={w}x{h}:fps=30"
226
+ filter_parts.append(
227
+ f"[0:v]trim=start={s}:end={e},setpts=PTS-STARTPTS,fps=30,{zp},setpts=PTS-STARTPTS[v{i}]"
228
+ )
229
  v_labels.append(f"[v{i}]")
230
  if has_audio:
231
+ filter_parts.append(f"[0:a]atrim=start={s}:end={e},asetpts=PTS-STARTPTS[a{i}]")
232
  a_labels.append(f"[a{i}]")
233
 
234
  n = len(segments)
 
264
  "red": "&H000000FF",
265
  }
266
 
267
+ _POSITIONS = {"top", "bottom", "left", "right", "center"}
268
+ _MODES = {"word", "phrase", "sentence"}
269
+ _EMPHASIS = {"pop", "punch", "calm"}
270
+
271
 
272
  def _ts(t: float) -> str:
273
+ total_cs = max(0, int(round(t * 100)))
274
+ h = total_cs // 360000
275
+ total_cs %= 360000
276
+ m = total_cs // 6000
277
+ total_cs %= 6000
278
+ s = total_cs // 100
279
+ cs = total_cs % 100
280
+ return f"{h}:{m:02d}:{s:02d}.{cs:02d}"
281
+
282
+
283
+ def _pick(value: object, allowed: set[str], fallback: str) -> str:
284
+ v = str(value or "").strip().lower()
285
+ return v if v in allowed else fallback
286
+
287
+
288
+ def _normalise_analysis(analysis: dict, seg_idx: int, n_total: int) -> dict:
289
+ """Validate model output and fill HRE fields used by the renderer."""
290
+ an = dict(analysis or {})
291
+ energy = _pick(an.get("energy_level"), {"high", "medium", "low"}, "medium")
292
+ moment = _pick(
293
+ an.get("moment_type"),
294
+ {"hook", "punchline", "context", "reaction", "transition"},
295
+ "context",
296
+ )
297
+
298
+ fallback_mode = "word" if energy == "high" or moment in {"hook", "punchline", "reaction"} else "sentence"
299
+ if energy == "medium" and moment not in {"context", "transition"}:
300
+ fallback_mode = "phrase"
301
+
302
+ pos = _pick(an.get("subtitle_position"), _POSITIONS, "bottom")
303
+ mode = _pick(an.get("subtitle_mode"), _MODES, fallback_mode)
304
+ emphasis = _pick(an.get("subtitle_emphasis"), _EMPHASIS, "punch" if mode == "word" else "calm")
305
+ color = _pick(an.get("subtitle_color"), set(_ASS_COLORS), "white")
306
+ zoom_direction = _pick(an.get("zoom_direction"), {"in", "out", "hold"}, "in")
307
+ zoom_speed = _pick(an.get("zoom_speed"), {"fast", "slow"}, "slow")
308
+
309
+ try:
310
+ face_cx = min(1.0, max(0.0, float(an.get("face_cx", 0.5))))
311
+ face_cy = min(1.0, max(0.0, float(an.get("face_cy", 0.38))))
312
+ except Exception:
313
+ face_cx, face_cy = 0.5, 0.38
314
+
315
+ if seg_idx == 0:
316
+ zoom_direction, zoom_speed = "in", "fast"
317
+ if mode == "sentence":
318
+ mode = "word"
319
+ if emphasis == "calm":
320
+ emphasis = "punch"
321
+
322
+ return {
323
+ **an,
324
+ "zoom_direction": zoom_direction,
325
+ "zoom_speed": zoom_speed,
326
+ "face_detected": bool(an.get("face_detected", False)),
327
+ "face_cx": face_cx,
328
+ "face_cy": face_cy,
329
+ "subtitle_position": pos,
330
+ "subtitle_mode": mode,
331
+ "subtitle_emphasis": emphasis,
332
+ "subtitle_color": color,
333
+ "energy_level": energy,
334
+ "moment_type": moment,
335
+ }
336
+
337
+
338
+ def _build_hre_plan(segments: list[dict], analyses: list[dict]) -> list[dict]:
339
+ plan = []
340
+ n_total = len(segments)
341
+ for i, (seg, analysis) in enumerate(zip(segments, analyses)):
342
+ an = _normalise_analysis(analysis, i, n_total)
343
+ plan.append({**an, "segment_index": i, "start": seg["start"], "end": seg["end"]})
344
+
345
+ # If the model repeats the same caption treatment for every segment, rotate
346
+ # through safe defaults so HRE visibly changes across the clip.
347
+ if len(plan) > 1 and len({(p["subtitle_position"], p["subtitle_mode"]) for p in plan}) == 1:
348
+ positions = ["bottom", "top", "left", "bottom", "right", "top"]
349
+ modes = ["word", "sentence", "phrase", "word", "sentence", "phrase"]
350
+ for i, p in enumerate(plan):
351
+ p["subtitle_position"] = positions[i % len(positions)]
352
+ p["subtitle_mode"] = modes[i % len(modes)]
353
+ if p["subtitle_mode"] == "word":
354
+ p["subtitle_emphasis"] = "punch"
355
+
356
+ return plan
357
+
358
+
359
+ def _ass_escape(text: str) -> str:
360
+ return (
361
+ text.replace("{", "(")
362
+ .replace("}", ")")
363
+ .replace("\r", " ")
364
+ .replace("\n", " ")
365
+ .strip()
366
+ )
367
+
368
+
369
+ def _wrap_text(text: str, max_chars: int) -> str:
370
+ text = _ass_escape(text)
371
+ if len(text) <= max_chars:
372
+ return text
373
+
374
+ words = text.split()
375
+ if len(words) <= 1:
376
+ return r"\N".join(text[i:i + max_chars] for i in range(0, len(text), max_chars))
377
+
378
+ lines: list[str] = []
379
+ line = ""
380
+ for word in words:
381
+ candidate = f"{line} {word}".strip()
382
+ if line and len(candidate) > max_chars:
383
+ lines.append(line)
384
+ line = word
385
+ else:
386
+ line = candidate
387
+ if line:
388
+ lines.append(line)
389
+
390
+ if len(lines) <= 2:
391
+ return r"\N".join(lines)
392
+ return r"\N".join([lines[0], " ".join(lines[1:])])
393
+
394
+
395
+ def _collect_clip_words(transcript: dict, clip_start: float, duration: float) -> list[dict]:
396
+ words: list[dict] = []
397
+ for seg in transcript.get("segments", []):
398
+ seg_start = float(seg.get("start", clip_start)) - clip_start
399
+ seg_end = float(seg.get("end", clip_start)) - clip_start
400
+ for word in seg.get("words", []):
401
+ text = str(word.get("word", word.get("text", ""))).strip()
402
+ if not text:
403
+ continue
404
+ start = float(word.get("start", seg_start + clip_start)) - clip_start
405
+ end = float(word.get("end", word.get("start", seg_end + clip_start))) - clip_start
406
+ if end <= start:
407
+ end = start + 0.24
408
+ if end <= 0 or start >= duration:
409
+ continue
410
+ words.append({
411
+ "start": max(0.0, start),
412
+ "end": min(duration, end),
413
+ "text": text,
414
+ })
415
+ return sorted(words, key=lambda w: (w["start"], w["end"]))
416
+
417
+
418
+ def _segment_text(transcript: dict, clip_start: float, seg: dict) -> str:
419
+ parts: list[str] = []
420
+ for item in transcript.get("segments", []):
421
+ start = float(item.get("start", clip_start)) - clip_start
422
+ end = float(item.get("end", clip_start)) - clip_start
423
+ if start < seg["end"] and end > seg["start"]:
424
+ text = str(item.get("text", "")).strip()
425
+ if text:
426
+ parts.append(text)
427
+ return " ".join(parts).strip()
428
+
429
+
430
+ def _words_in_segment(words: list[dict], seg: dict) -> list[dict]:
431
+ return [
432
+ w for w in words
433
+ if w["start"] < seg["end"] and w["end"] > seg["start"]
434
+ ]
435
+
436
+
437
+ def _display_text(text: str, mode: str, emphasis: str) -> str:
438
+ text = text.strip()
439
+ if mode == "sentence" and emphasis == "calm":
440
+ return text
441
+ return text.upper()
442
+
443
+
444
+ def _append_event(events: list[dict], start: float, end: float, text: str, plan: dict) -> None:
445
+ start = max(float(plan["start"]), start)
446
+ end = min(float(plan["end"]), end)
447
+ if end - start < 0.08 or not text.strip():
448
+ return
449
+ events.append({
450
+ "start": start,
451
+ "end": end,
452
+ "text": text.strip(),
453
+ "plan": plan,
454
+ })
455
+
456
+
457
+ def _word_events(words: list[dict], seg: dict, plan: dict) -> list[dict]:
458
+ events: list[dict] = []
459
+ cursor = seg["start"]
460
+ min_d = 0.14 if plan["energy_level"] == "high" else 0.18
461
+ max_d = 0.72 if plan["energy_level"] == "high" else 0.95
462
+
463
+ for i, word in enumerate(words):
464
+ start = max(seg["start"], word["start"], cursor)
465
+ next_start = words[i + 1]["start"] if i + 1 < len(words) else seg["end"]
466
+ natural_end = max(word["end"], start + min_d)
467
+ end = min(seg["end"], natural_end, start + max_d)
468
+ if next_start > start:
469
+ end = min(end, max(start + min_d, next_start - 0.015))
470
+ if end <= start:
471
+ end = min(seg["end"], start + min_d)
472
+
473
+ _append_event(events, start, end, word["text"], plan)
474
+ cursor = end + 0.015
475
+ if cursor >= seg["end"]:
476
+ break
477
+
478
+ return events
479
+
480
+
481
+ def _line_events(
482
+ words: list[dict],
483
+ seg: dict,
484
+ plan: dict,
485
+ max_words: int,
486
+ max_duration: float,
487
+ max_chars: int,
488
+ ) -> list[dict]:
489
+ events: list[dict] = []
490
+ i = 0
491
+ cursor = seg["start"]
492
+
493
+ while i < len(words) and cursor < seg["end"] - 0.08:
494
+ group: list[dict] = []
495
+ start = max(seg["start"], words[i]["start"], cursor)
496
+ end = start
497
+ chars = 0
498
+
499
+ while i < len(words):
500
+ word = words[i]
501
+ proposed_end = min(seg["end"], max(word["end"], word["start"] + 0.2))
502
+ proposed_chars = chars + len(word["text"]) + (1 if group else 0)
503
+ if group and (
504
+ len(group) >= max_words
505
+ or proposed_end - start > max_duration
506
+ or proposed_chars > max_chars
507
+ ):
508
+ break
509
+ group.append(word)
510
+ chars = proposed_chars
511
+ end = max(end, proposed_end)
512
+ i += 1
513
+
514
+ if not group:
515
+ i += 1
516
+ continue
517
+
518
+ end = min(seg["end"], max(end, start + 0.55))
519
+ text = " ".join(w["text"] for w in group)
520
+ _append_event(events, start, end, text, plan)
521
+ cursor = end + 0.04
522
+
523
+ return events
524
+
525
+
526
+ def _fallback_text_events(text: str, seg: dict, plan: dict) -> list[dict]:
527
+ if not text:
528
+ return []
529
+
530
+ mode = plan["subtitle_mode"]
531
+ if mode == "word":
532
+ chunk_size = 1
533
+ elif mode == "phrase":
534
+ chunk_size = 3
535
+ else:
536
+ chunk_size = 7
537
+
538
+ units = text.split()
539
+ if len(units) <= 1 and len(text) > 20:
540
+ step = 10 if mode == "word" else 24 if mode == "phrase" else 36
541
+ units = [text[i:i + step] for i in range(0, len(text), step)]
542
+
543
+ chunks = [" ".join(units[i:i + chunk_size]) for i in range(0, len(units), chunk_size)]
544
+ chunks = [c for c in chunks if c.strip()]
545
+ if not chunks:
546
+ return []
547
+
548
+ events: list[dict] = []
549
+ seg_d = max(0.1, seg["end"] - seg["start"])
550
+ dur = seg_d / len(chunks)
551
+ for i, chunk in enumerate(chunks):
552
+ start = seg["start"] + i * dur
553
+ end = seg["start"] + (i + 1) * dur
554
+ _append_event(events, start, end, chunk, plan)
555
+ return events
556
+
557
+
558
+ def _build_subtitle_events(
559
+ transcript: dict,
560
+ clip_start: float,
561
+ duration: float,
562
+ segments: list[dict],
563
+ plan: list[dict],
564
+ ) -> list[dict]:
565
+ words = _collect_clip_words(transcript, clip_start, duration)
566
+ events: list[dict] = []
567
+
568
+ for seg, seg_plan in zip(segments, plan):
569
+ seg_words = _words_in_segment(words, seg)
570
+ mode = seg_plan["subtitle_mode"]
571
+
572
+ if seg_words and mode == "word":
573
+ seg_events = _word_events(seg_words, seg, seg_plan)
574
+ elif seg_words and mode == "phrase":
575
+ seg_events = _line_events(seg_words, seg, seg_plan, max_words=3, max_duration=1.7, max_chars=28)
576
+ elif seg_words:
577
+ seg_events = _line_events(seg_words, seg, seg_plan, max_words=7, max_duration=2.8, max_chars=44)
578
+ else:
579
+ seg_events = []
580
+
581
+ if not seg_events:
582
+ seg_events = _fallback_text_events(_segment_text(transcript, clip_start, seg), seg, seg_plan)
583
+ events.extend(seg_events)
584
+
585
+ events = sorted(events, key=lambda ev: (ev["start"], ev["end"]))
586
+
587
+ # ASS draws all active events at once; keep one visible caption event at a
588
+ # time so word/phrase/sentence modes never stack on top of each other.
589
+ cleaned: list[dict] = []
590
+ cursor = 0.0
591
+ for ev in events:
592
+ start = max(ev["start"], cursor)
593
+ end = min(duration, ev["end"])
594
+ if end - start < 0.08:
595
+ continue
596
+ cleaned.append({**ev, "start": start, "end": end})
597
+ cursor = end + 0.01
598
+ return cleaned
599
+
600
+
601
+ def _subtitle_tag(plan: dict) -> tuple[str, int]:
602
+ pos = plan["subtitle_position"]
603
+ mode = plan["subtitle_mode"]
604
+ energy = plan["energy_level"]
605
+ emphasis = plan["subtitle_emphasis"]
606
+ color = _ASS_COLORS.get(plan["subtitle_color"], "&H00FFFFFF")
607
+
608
+ anchors = {
609
+ "top": (8, 540, 230),
610
+ "bottom": (2, 540, 1660),
611
+ "left": (4, 95, 960),
612
+ "right": (6, 985, 960),
613
+ "center": (5, 540, 960),
614
+ }
615
+ alignment, x, y = anchors.get(pos, anchors["bottom"])
616
+
617
+ if mode == "sentence":
618
+ font_size = 66 if energy != "high" else 74
619
+ max_chars = 34
620
+ elif mode == "phrase":
621
+ font_size = 82 if energy != "low" else 76
622
+ max_chars = 24
623
+ else:
624
+ font_size = 102 if energy == "high" else 92
625
+ max_chars = 18
626
+
627
+ if pos in {"left", "right"}:
628
+ font_size -= 8
629
+ max_chars = min(max_chars, 22)
630
+
631
+ base = (
632
+ f"{{\\an{alignment}\\pos({x},{y})\\1c{color}&\\fs{font_size}"
633
+ "\\b1\\bord5\\shad1\\q2}}"
634
+ )
635
+ if emphasis in {"pop", "punch"} or mode == "word":
636
+ base += "{\\fscx125\\fscy125\\t(0,120,\\fscx100\\fscy100)}"
637
+ return base, max_chars
638
 
639
 
640
  def _generate_per_segment_subtitles(
 
644
  segments: list[dict],
645
  analyses: list[dict],
646
  ) -> None:
647
+ """Write one ASS file from the HRE plan.
 
648
 
649
+ The important rule is that HRE can change style every segment, but it must
650
+ never emit simultaneous caption events at the same timestamp.
651
+ """
652
+ duration = max((float(seg["end"]) for seg in segments), default=0.0)
653
+ plan = _build_hre_plan(segments, analyses)
654
+ events = _build_subtitle_events(transcript, clip_start, duration, segments, plan)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
655
 
656
  lines = [
657
  "[Script Info]",
 
665
  "OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, "
666
  "ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, "
667
  "Alignment, MarginL, MarginR, MarginV, Encoding",
668
+ "Style: Default,Noto Sans,82,&H00FFFFFF,&H0000FFFF,&H00000000,&H80000000,"
669
+ "-1,0,0,0,100,100,0,0,1,5,1,2,40,40,200,1",
670
  "",
671
  "[Events]",
672
  "Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text",
673
  ]
674
 
675
  for ev in events:
676
+ seg_plan = ev["plan"]
677
+ tag, max_chars = _subtitle_tag(seg_plan)
678
+ text = _display_text(ev["text"], seg_plan["subtitle_mode"], seg_plan["subtitle_emphasis"])
679
+ text = _wrap_text(text, max_chars)
 
 
 
 
 
 
 
 
 
 
680
 
681
  lines.append(
682
  f"Dialogue: 0,{_ts(ev['start'])},{_ts(ev['end'])},"
683
+ f"Default,,0,0,0,,{tag}{text}"
684
  )
685
 
686
  ass_path.write_text("\n".join(lines), encoding="utf-8")
687
+ plan_path = ass_path.with_suffix(".hre_plan.json")
688
+ plan_path.write_text(json.dumps(plan, ensure_ascii=False, indent=2), encoding="utf-8")
689
+ logger.debug(f"ASS: {len(events)} events across {len(segments)} HRE segments")
690
 
691
 
692
  # ─── Emoji ─────────────────────────────────────────────────────────────────────
 
760
  transcript: dict,
761
  output_path: Path,
762
  ) -> Path:
763
+ """Apply per-segment AI-driven HRE with varied zoom and caption plans."""
764
  output_path.parent.mkdir(parents=True, exist_ok=True)
765
  clip_start = clip_data.get("start", 0.0)
766
 
 
787
  _analyze_segment(clip_path, seg, i, n, transcript, clip_start, tmp_dir)
788
  for i, seg in enumerate(segments)
789
  ]
790
+ plan = _build_hre_plan(segments, analyses)
791
 
792
+ for i, (seg, an) in enumerate(zip(segments, plan)):
793
  logger.info(
794
  f" [{seg['start']:.1f}s-{seg['end']:.1f}s] "
795
  f"zoom={an.get('zoom_direction')}({an.get('zoom_speed')}) "
796
+ f"sub={an.get('subtitle_position')}/{an.get('subtitle_mode')}/"
797
+ f"{an.get('subtitle_color')} "
798
  f"type={an.get('moment_type')} energy={an.get('energy_level')}"
799
  )
800
 
801
  # 3. Per-segment zoom via filter_complex
802
  zoomed = _apply_per_segment_zoom(
803
+ clip_path, segments, plan, w, h, tmp_zoomed, has_audio=has_audio
804
  )
805
 
806
  # 4. Per-segment ASS subtitles
807
  ass_path = output_path.with_suffix(".ass")
808
+ _generate_per_segment_subtitles(transcript, ass_path, clip_start, segments, plan)
809
 
810
  # 5. Emoji from highest-energy segment
811
+ emoji = _get_emoji(clip_data, plan)
812
 
813
  # 6. Render
814
  _render_final(zoomed, ass_path, emoji, output_path)
frontend/components/ClipSettings.tsx CHANGED
@@ -40,8 +40,8 @@ const L = {
40
  normalTitle: "Normal Subtitles",
41
  normalDesc: "Customize font, colors, animations",
42
  hreTitle: "High-Retention",
43
- hreDesc: "AI picks everything + auto-zoom + jump cuts",
44
- hreInfo: "AI will auto-select font/colors/animation, remove silence, zoom on faces, and add emoji overlays.",
45
  },
46
  th: {
47
  style: "สไตล์คลิป",
@@ -53,8 +53,8 @@ const L = {
53
  normalTitle: "ซับปกติ",
54
  normalDesc: "เลือกรูปแบบซับได้เอง",
55
  hreTitle: "High-Retention",
56
- hreDesc: "AI เลือกทุกอย่า + auto-zoom + jump cuts",
57
- hreInfo: "AI จะเลือก font/สี/animation + ัด silence + zoom หน้า + ใส่ emoji ให้อัตโนมัติ",
58
  },
59
  zh: {
60
  style: "片段风格",
@@ -66,8 +66,8 @@ const L = {
66
  normalTitle: "普通字幕",
67
  normalDesc: "自定义字体、颜色、动画",
68
  hreTitle: "高留存",
69
- hreDesc: "AI 自动处理 + 自动缩放 + 跳切",
70
- hreInfo: "AI 将自动选择/颜色/动画去除静音段,大人脸,并添加表情覆盖。",
71
  },
72
  } as const;
73
 
 
40
  normalTitle: "Normal Subtitles",
41
  normalDesc: "Customize font, colors, animations",
42
  hreTitle: "High-Retention",
43
+ hreDesc: "AI picks timing, captions, and zoom",
44
+ hreInfo: "AI will create a per-segment edit plan, vary caption placement/mode, zoom on key moments, and add emoji overlays.",
45
  },
46
  th: {
47
  style: "สไตล์คลิป",
 
53
  normalTitle: "ซับปกติ",
54
  normalDesc: "เลือกรูปแบบซับได้เอง",
55
  hreTitle: "High-Retention",
56
+ hreDesc: "AI เลือกจังหวะ ซับ และซูมให้",
57
+ hreInfo: "AI จะสร้างแผนตัดต่อรายช่วง เลือกตำแหน่ง/รูปแบบซับ ซูมช่วงสำัญ และใส่ emoji ให้อัตโนมัติ",
58
  },
59
  zh: {
60
  style: "片段风格",
 
66
  normalTitle: "普通字幕",
67
  normalDesc: "自定义字体、颜色、动画",
68
  hreTitle: "高留存",
69
+ hreDesc: "AI 自动选择节奏、字幕和缩放",
70
+ hreInfo: "AI 将生成分段剪辑计划,调整幕位置/模式关键时刻,并添加表情覆盖。",
71
  },
72
  } as const;
73
 
frontend/messages/en.json CHANGED
@@ -20,7 +20,7 @@
20
  "mode_label": "Editing Mode",
21
  "normal_mode": "Normal Subtitles",
22
  "hre_mode": "High-Retention Editing (AI decides)",
23
- "hre_hint": "AI will auto-select caption style, apply auto-zoom, silence removal, and TikTok-style cuts."
24
  },
25
  "step3": {
26
  "title": "Subtitle Designer",
 
20
  "mode_label": "Editing Mode",
21
  "normal_mode": "Normal Subtitles",
22
  "hre_mode": "High-Retention Editing (AI decides)",
23
+ "hre_hint": "AI will create a per-segment edit plan with varied captions, auto-zoom, and TikTok-style emphasis."
24
  },
25
  "step3": {
26
  "title": "Subtitle Designer",
frontend/messages/th.json CHANGED
@@ -23,7 +23,7 @@
23
  "mode_label": "โหมดการตัด",
24
  "normal_mode": "ซับปกติ",
25
  "hre_mode": "High-Retention Editing (AI เลือกให้)",
26
- "hre_hint": "AI จะเลือกรูปแบบซับ จัด auto-zoom และแบบ TikTok ให้อัตโนมัติ"
27
  },
28
  "step3": {
29
  "title": "ออกแบบซับไตเติ้ล",
 
23
  "mode_label": "โหมดการตัด",
24
  "normal_mode": "ซับปกติ",
25
  "hre_mode": "High-Retention Editing (AI เลือกให้)",
26
+ "hre_hint": "AI จะสร้างแผนตัดต่อรายช่วง เลือกซับหลายรูปแบบ จัด auto-zoom และเน้นจงหวะแบบ TikTok"
27
  },
28
  "step3": {
29
  "title": "ออกแบบซับไตเติ้ล",
frontend/messages/zh.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "nav": { "brand": "ElevenClip AI", "tagline": "AI智能剪辑精彩片段" },
3
  "step1": { "title": "添加视频", "upload_tab": "上传文件", "youtube_tab": "YouTube链接", "drop_hint": "拖放视频文件到此处,或点击选择", "youtube_placeholder": "粘贴YouTube链接...", "channel_label": "频道描述(可选)", "channel_placeholder": "例如:中文游戏频道,专注于搞笑时刻", "fetch_info": "获取信息" },
4
- "step2": { "title": "剪辑设置", "style_label": "剪辑风格", "duration_label": "目标时长(秒)", "count_label": "剪辑数量", "clip_lang_label": "视频语言", "sub_lang_label": "字幕语言", "mode_label": "编辑模式", "normal_mode": "普通字幕", "hre_mode": "高留存率编辑(AI决定)", "hre_hint": "AI将自动选择字幕样式应用自动缩放、去除静音进行TikTok风格剪辑。" },
5
  "step3": { "title": "字幕设计", "font_label": "字体", "size_label": "字体大小", "primary_color": "主要颜色", "secondary_color": "卡拉OK颜色", "outline_color": "描边颜色", "shadow_color": "阴影颜色", "outline_size": "描边大小", "shadow_size": "阴影大小", "display_mode": "显示模式", "word_by_word": "逐字", "sentence": "句子", "animation": "动画", "alignment": "对齐", "preview": "预览" },
6
  "generate": { "button": "生成剪辑", "processing": "处理中..." },
7
  "styles": { "funny": "搞笑", "serious": "严肃", "educational": "教育", "gaming": "游戏", "entertainment": "娱乐" },
 
1
  {
2
  "nav": { "brand": "ElevenClip AI", "tagline": "AI智能剪辑精彩片段" },
3
  "step1": { "title": "添加视频", "upload_tab": "上传文件", "youtube_tab": "YouTube链接", "drop_hint": "拖放视频文件到此处,或点击选择", "youtube_placeholder": "粘贴YouTube链接...", "channel_label": "频道描述(可选)", "channel_placeholder": "例如:中文游戏频道,专注于搞笑时刻", "fetch_info": "获取信息" },
4
+ "step2": { "title": "剪辑设置", "style_label": "剪辑风格", "duration_label": "目标时长(秒)", "count_label": "剪辑数量", "clip_lang_label": "视频语言", "sub_lang_label": "字幕语言", "mode_label": "编辑模式", "normal_mode": "普通字幕", "hre_mode": "高留存率编辑(AI决定)", "hre_hint": "AI将生成分段剪辑计划,改变字幕样式应用自动缩放突出TikTok节奏。" },
5
  "step3": { "title": "字幕设计", "font_label": "字体", "size_label": "字体大小", "primary_color": "主要颜色", "secondary_color": "卡拉OK颜色", "outline_color": "描边颜色", "shadow_color": "阴影颜色", "outline_size": "描边大小", "shadow_size": "阴影大小", "display_mode": "显示模式", "word_by_word": "逐字", "sentence": "句子", "animation": "动画", "alignment": "对齐", "preview": "预览" },
6
  "generate": { "button": "生成剪辑", "processing": "处理中..." },
7
  "styles": { "funny": "搞笑", "serious": "严肃", "educational": "教育", "gaming": "游戏", "entertainment": "娱乐" },
frontend/next-env.d.ts ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ /// <reference types="next" />
2
+ /// <reference types="next/image-types/global" />
3
+ import "./.next/types/routes.d.ts";
4
+
5
+ // NOTE: This file should not be edited
6
+ // see https://nextjs.org/docs/app/api-reference/config/typescript for more information.