Spaces:
Sleeping
Sleeping
Update app/processing.py
Browse files- app/processing.py +24 -5
app/processing.py
CHANGED
|
@@ -34,7 +34,7 @@ Instructions:
|
|
| 34 |
2. **Extract Key Decisions**: Pinpoint any decisions that were made, including the rationale behind them if available.
|
| 35 |
3. **Highlight Main Outcomes**: Detail the primary results or conclusions reached during the discussion.
|
| 36 |
4. **Structure the Output**: Present the summary in a clean, professional format. Use bullet points for clarity.
|
| 37 |
-
5. **Maintain Neutrality**: The summary should be objective and free of personal interpretation or bias
|
| 38 |
"""
|
| 39 |
ACTION_ITEMS_SYSTEM_PROMPT = """
|
| 40 |
You are a highly specialized AI assistant tasked with identifying and extracting actionable tasks, commitments, and deadlines from a meeting or lecture transcript. Your output must be clear, concise, and formatted as a JSON object.
|
|
@@ -87,10 +87,12 @@ async def run_pipeline(task_id: str, file_path: Path, tasks_db: dict):
|
|
| 87 |
try:
|
| 88 |
logger.info(f"Starting pipeline for task {task_id} with file {file_path}")
|
| 89 |
|
| 90 |
-
# Make chunk duration configurable via environment variable, default to
|
| 91 |
CHUNK_DURATION_S = int(os.getenv("CHUNK_DURATION_S", 120))
|
| 92 |
|
| 93 |
sr = librosa.get_samplerate(str(file_path))
|
|
|
|
|
|
|
| 94 |
stream = librosa.stream(
|
| 95 |
str(file_path),
|
| 96 |
block_length=int(sr * CHUNK_DURATION_S),
|
|
@@ -101,18 +103,35 @@ async def run_pipeline(task_id: str, file_path: Path, tasks_db: dict):
|
|
| 101 |
transcription_tasks = []
|
| 102 |
for i, y_chunk in enumerate(stream):
|
| 103 |
logger.info(f"Queuing audio segment {i+1} for transcription...")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
pcm_chunk = (y_chunk * 32767).astype(np.int16)
|
| 105 |
|
|
|
|
|
|
|
| 106 |
audio_segment = AudioSegment(
|
| 107 |
pcm_chunk.tobytes(),
|
| 108 |
-
frame_rate=
|
| 109 |
sample_width=pcm_chunk.dtype.itemsize,
|
| 110 |
-
channels=
|
| 111 |
)
|
| 112 |
transcription_tasks.append(transcribe_chunk(i, audio_segment))
|
| 113 |
|
| 114 |
# Clean up memory explicitly
|
| 115 |
-
del pcm_chunk, y_chunk
|
| 116 |
gc.collect()
|
| 117 |
|
| 118 |
# Run all transcription tasks in parallel
|
|
|
|
| 34 |
2. **Extract Key Decisions**: Pinpoint any decisions that were made, including the rationale behind them if available.
|
| 35 |
3. **Highlight Main Outcomes**: Detail the primary results or conclusions reached during the discussion.
|
| 36 |
4. **Structure the Output**: Present the summary in a clean, professional format. Use bullet points for clarity.
|
| 37 |
+
5. **Maintain Neutrality**: The summary should be objective and free of personal interpretation or bias.
|
| 38 |
"""
|
| 39 |
ACTION_ITEMS_SYSTEM_PROMPT = """
|
| 40 |
You are a highly specialized AI assistant tasked with identifying and extracting actionable tasks, commitments, and deadlines from a meeting or lecture transcript. Your output must be clear, concise, and formatted as a JSON object.
|
|
|
|
| 87 |
try:
|
| 88 |
logger.info(f"Starting pipeline for task {task_id} with file {file_path}")
|
| 89 |
|
| 90 |
+
# Make chunk duration configurable via environment variable, default to 120 seconds
|
| 91 |
CHUNK_DURATION_S = int(os.getenv("CHUNK_DURATION_S", 120))
|
| 92 |
|
| 93 |
sr = librosa.get_samplerate(str(file_path))
|
| 94 |
+
target_sr = 16000 # Resample to 16kHz for Whisper compatibility and smaller file size
|
| 95 |
+
|
| 96 |
stream = librosa.stream(
|
| 97 |
str(file_path),
|
| 98 |
block_length=int(sr * CHUNK_DURATION_S),
|
|
|
|
| 103 |
transcription_tasks = []
|
| 104 |
for i, y_chunk in enumerate(stream):
|
| 105 |
logger.info(f"Queuing audio segment {i+1} for transcription...")
|
| 106 |
+
|
| 107 |
+
# Ensure y_chunk is 2D
|
| 108 |
+
if y_chunk.ndim == 1:
|
| 109 |
+
y_chunk = y_chunk.reshape(-1, 1)
|
| 110 |
+
|
| 111 |
+
# Mix to mono if multi-channel
|
| 112 |
+
if y_chunk.shape[1] > 1:
|
| 113 |
+
y_chunk = np.mean(y_chunk, axis=1, keepdims=True)
|
| 114 |
+
|
| 115 |
+
# Resample to target_sr
|
| 116 |
+
if sr != target_sr:
|
| 117 |
+
y_chunk = librosa.resample(y_chunk, orig_sr=sr, target_sr=target_sr, axis=0)
|
| 118 |
+
|
| 119 |
+
current_sr = target_sr
|
| 120 |
+
|
| 121 |
pcm_chunk = (y_chunk * 32767).astype(np.int16)
|
| 122 |
|
| 123 |
+
channels = y_chunk.shape[1] # Should be 1
|
| 124 |
+
|
| 125 |
audio_segment = AudioSegment(
|
| 126 |
pcm_chunk.tobytes(),
|
| 127 |
+
frame_rate=current_sr,
|
| 128 |
sample_width=pcm_chunk.dtype.itemsize,
|
| 129 |
+
channels=channels
|
| 130 |
)
|
| 131 |
transcription_tasks.append(transcribe_chunk(i, audio_segment))
|
| 132 |
|
| 133 |
# Clean up memory explicitly
|
| 134 |
+
del pcm_chunk, y_chunk, audio_segment
|
| 135 |
gc.collect()
|
| 136 |
|
| 137 |
# Run all transcription tasks in parallel
|