Grinding commited on
Commit
ae78221
·
verified ·
1 Parent(s): 50dc8da

Update app/processing.py

Browse files
Files changed (1) hide show
  1. app/processing.py +24 -5
app/processing.py CHANGED
@@ -34,7 +34,7 @@ Instructions:
34
  2. **Extract Key Decisions**: Pinpoint any decisions that were made, including the rationale behind them if available.
35
  3. **Highlight Main Outcomes**: Detail the primary results or conclusions reached during the discussion.
36
  4. **Structure the Output**: Present the summary in a clean, professional format. Use bullet points for clarity.
37
- 5. **Maintain Neutrality**: The summary should be objective and free of personal interpretation or bias and JSON.
38
  """
39
  ACTION_ITEMS_SYSTEM_PROMPT = """
40
  You are a highly specialized AI assistant tasked with identifying and extracting actionable tasks, commitments, and deadlines from a meeting or lecture transcript. Your output must be clear, concise, and formatted as a JSON object.
@@ -87,10 +87,12 @@ async def run_pipeline(task_id: str, file_path: Path, tasks_db: dict):
87
  try:
88
  logger.info(f"Starting pipeline for task {task_id} with file {file_path}")
89
 
90
- # Make chunk duration configurable via environment variable, default to 3 minutes
91
  CHUNK_DURATION_S = int(os.getenv("CHUNK_DURATION_S", 120))
92
 
93
  sr = librosa.get_samplerate(str(file_path))
 
 
94
  stream = librosa.stream(
95
  str(file_path),
96
  block_length=int(sr * CHUNK_DURATION_S),
@@ -101,18 +103,35 @@ async def run_pipeline(task_id: str, file_path: Path, tasks_db: dict):
101
  transcription_tasks = []
102
  for i, y_chunk in enumerate(stream):
103
  logger.info(f"Queuing audio segment {i+1} for transcription...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  pcm_chunk = (y_chunk * 32767).astype(np.int16)
105
 
 
 
106
  audio_segment = AudioSegment(
107
  pcm_chunk.tobytes(),
108
- frame_rate=sr,
109
  sample_width=pcm_chunk.dtype.itemsize,
110
- channels=1
111
  )
112
  transcription_tasks.append(transcribe_chunk(i, audio_segment))
113
 
114
  # Clean up memory explicitly
115
- del pcm_chunk, y_chunk
116
  gc.collect()
117
 
118
  # Run all transcription tasks in parallel
 
34
  2. **Extract Key Decisions**: Pinpoint any decisions that were made, including the rationale behind them if available.
35
  3. **Highlight Main Outcomes**: Detail the primary results or conclusions reached during the discussion.
36
  4. **Structure the Output**: Present the summary in a clean, professional format. Use bullet points for clarity.
37
+ 5. **Maintain Neutrality**: The summary should be objective and free of personal interpretation or bias.
38
  """
39
  ACTION_ITEMS_SYSTEM_PROMPT = """
40
  You are a highly specialized AI assistant tasked with identifying and extracting actionable tasks, commitments, and deadlines from a meeting or lecture transcript. Your output must be clear, concise, and formatted as a JSON object.
 
87
  try:
88
  logger.info(f"Starting pipeline for task {task_id} with file {file_path}")
89
 
90
+ # Make chunk duration configurable via environment variable, default to 120 seconds
91
  CHUNK_DURATION_S = int(os.getenv("CHUNK_DURATION_S", 120))
92
 
93
  sr = librosa.get_samplerate(str(file_path))
94
+ target_sr = 16000 # Resample to 16kHz for Whisper compatibility and smaller file size
95
+
96
  stream = librosa.stream(
97
  str(file_path),
98
  block_length=int(sr * CHUNK_DURATION_S),
 
103
  transcription_tasks = []
104
  for i, y_chunk in enumerate(stream):
105
  logger.info(f"Queuing audio segment {i+1} for transcription...")
106
+
107
+ # Ensure y_chunk is 2D
108
+ if y_chunk.ndim == 1:
109
+ y_chunk = y_chunk.reshape(-1, 1)
110
+
111
+ # Mix to mono if multi-channel
112
+ if y_chunk.shape[1] > 1:
113
+ y_chunk = np.mean(y_chunk, axis=1, keepdims=True)
114
+
115
+ # Resample to target_sr
116
+ if sr != target_sr:
117
+ y_chunk = librosa.resample(y_chunk, orig_sr=sr, target_sr=target_sr, axis=0)
118
+
119
+ current_sr = target_sr
120
+
121
  pcm_chunk = (y_chunk * 32767).astype(np.int16)
122
 
123
+ channels = y_chunk.shape[1] # Should be 1
124
+
125
  audio_segment = AudioSegment(
126
  pcm_chunk.tobytes(),
127
+ frame_rate=current_sr,
128
  sample_width=pcm_chunk.dtype.itemsize,
129
+ channels=channels
130
  )
131
  transcription_tasks.append(transcribe_chunk(i, audio_segment))
132
 
133
  # Clean up memory explicitly
134
+ del pcm_chunk, y_chunk, audio_segment
135
  gc.collect()
136
 
137
  # Run all transcription tasks in parallel