Stylique commited on
Commit
9534a84
·
verified ·
1 Parent(s): bfbb2bb

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -16
app.py CHANGED
@@ -99,12 +99,11 @@ def background_process(job_id: str, req: ProcessRequest):
99
  file=audio_file,
100
  model="whisper-1",
101
  response_format="verbose_json",
102
- timestamp_granularities=["segment"]
103
  )
104
  segments = transcript.segments
105
  print(f"[{job_id}] Whisper analysis complete. Found {len(segments)} segments.")
106
  except Exception as e:
107
- # Catch common JSON decoding errors from OpenAI/Network here
108
  print(f"[{job_id}] OpenAI/JSON Error: {traceback.format_exc()}")
109
  raise Exception(f"OpenAI Analysis Error: {str(e)}")
110
 
@@ -112,27 +111,35 @@ def background_process(job_id: str, req: ProcessRequest):
112
  raise Exception("No speech detected in video")
113
 
114
  # 5. Slice Video and Upload
115
- print(f"[{job_id}] Step 5: Starting slice loop...")
116
  processed_slices = []
117
  total_segments = len(segments)
118
 
119
- # Reduced buffers to avoid repetition while maintaining clean cuts
120
- BUFFER_START = 0.05
121
- BUFFER_END = 0.2
122
-
123
  for i, segment in enumerate(segments):
124
  orig_start = segment.start
125
  orig_end = segment.end
126
 
127
- # Lookahead to avoid overlapping with next segment
128
- next_start = segments[i+1].start if i + 1 < total_segments else float('inf')
129
- # Lookbehind to avoid overlapping with previous segment
130
- prev_end = segments[i-1].end if i > 0 else 0
131
 
132
- # Apply padding but stay within boundaries of adjacent segments
133
- start = max(prev_end, orig_start - BUFFER_START)
134
- end = min(next_start, orig_end + BUFFER_END)
 
 
 
 
 
 
 
 
 
 
 
 
135
 
 
 
136
  text = segment.text.strip()
137
  duration = end - start
138
 
@@ -145,11 +152,14 @@ def background_process(job_id: str, req: ProcessRequest):
145
  output_path = temp_dir / output_filename
146
 
147
  try:
148
- # Precise Slicing
 
 
149
  subprocess.run([
150
  "ffmpeg", "-ss", str(start), "-i", str(video_path), "-t", str(duration), "-y",
151
  "-c:v", "libx264", "-preset", "ultrafast", "-crf", "28",
152
- "-c:a", "aac", "-b:a", "128k", "-map_metadata", "-1", "-avoid_negative_ts", "make_zero",
 
153
  str(output_path)
154
  ], check=True, capture_output=True)
155
  except subprocess.CalledProcessError as e:
 
99
  file=audio_file,
100
  model="whisper-1",
101
  response_format="verbose_json",
102
+ timestamp_granularities=["segment", "word"]
103
  )
104
  segments = transcript.segments
105
  print(f"[{job_id}] Whisper analysis complete. Found {len(segments)} segments.")
106
  except Exception as e:
 
107
  print(f"[{job_id}] OpenAI/JSON Error: {traceback.format_exc()}")
108
  raise Exception(f"OpenAI Analysis Error: {str(e)}")
109
 
 
111
  raise Exception("No speech detected in video")
112
 
113
  # 5. Slice Video and Upload
114
+ print(f"[{job_id}] Step 5: Starting intelligent slice loop...")
115
  processed_slices = []
116
  total_segments = len(segments)
117
 
 
 
 
 
118
  for i, segment in enumerate(segments):
119
  orig_start = segment.start
120
  orig_end = segment.end
121
 
122
+ # Intelligent Midpoint Slicing:
123
+ # We split the silence between segments 50/50, but with safety caps.
 
 
124
 
125
+ # 5.1 Calculate End Padding (Next Segment Gap)
126
+ if i + 1 < total_segments:
127
+ gap_next = segments[i+1].start - orig_end
128
+ # Split gap, ensure at least 0.05s overlap if tight, cap at 0.3s
129
+ end_padding = max(0.05, min(0.3, gap_next / 2))
130
+ else:
131
+ end_padding = 0.5 # Tail for the last segment
132
+
133
+ # 5.2 Calculate Start Padding (Previous Segment Gap)
134
+ if i > 0:
135
+ gap_prev = orig_start - segments[i-1].end
136
+ # Split gap, ensure at least 0.05s overlap if tight, cap at 0.1s
137
+ start_padding = max(0.05, min(0.1, gap_prev / 2))
138
+ else:
139
+ start_padding = 0.1 # Lead-in for the first segment
140
 
141
+ start = max(0, orig_start - start_padding)
142
+ end = orig_end + end_padding
143
  text = segment.text.strip()
144
  duration = end - start
145
 
 
152
  output_path = temp_dir / output_filename
153
 
154
  try:
155
+ # Precise Slicing with Audio Sync Optimization
156
+ # -ss before -i is fast; -t after -i is precise duration.
157
+ # -af aresample=async=1 ensures audio starts/ends correctly relative to the seek.
158
  subprocess.run([
159
  "ffmpeg", "-ss", str(start), "-i", str(video_path), "-t", str(duration), "-y",
160
  "-c:v", "libx264", "-preset", "ultrafast", "-crf", "28",
161
+ "-c:a", "aac", "-b:a", "128k", "-af", "aresample=async=1",
162
+ "-map_metadata", "-1", "-avoid_negative_ts", "make_zero",
163
  str(output_path)
164
  ], check=True, capture_output=True)
165
  except subprocess.CalledProcessError as e: