Grinding commited on
Commit
27f255b
·
verified ·
1 Parent(s): 626176b

Update app/processing.py

Browse files
Files changed (1) hide show
  1. app/processing.py +77 -31
app/processing.py CHANGED
@@ -26,18 +26,28 @@ except Exception as e:
26
  logger.error(f"Failed to initialize Groq client: {e}")
27
 
28
  # --- Prompts ---
29
- SUMMARIZATION_SYSTEM_PROMPT = """
30
- You are an expert AI assistant specializing in creating concise, structured, and insightful summaries of meeting and lecture transcripts. Your goal is to distill the most critical information into a format that is easy to read and act upon.
31
 
32
  Instructions:
33
- 1. **Identify Core Themes**: Begin by identifying the main topics and objectives discussed.
34
  2. **Extract Key Decisions**: Pinpoint any decisions that were made, including the rationale behind them if available.
35
- 3. **Highlight Main Outcomes**: Detail the primary results or conclusions reached during the discussion.
 
 
 
 
 
 
 
 
 
 
36
  4. **Structure the Output**: Present the summary in a clean, professional format. Use bullet points for clarity.
37
  5. **Maintain Neutrality**: The summary should be objective and free of personal interpretation or bias.
38
  """
39
  ACTION_ITEMS_SYSTEM_PROMPT = """
40
- You are a highly specialized AI assistant tasked with identifying and extracting actionable tasks, commitments, and deadlines from a meeting or lecture transcript. Your output must be clear, concise, and formatted as a JSON object.
41
 
42
  Instructions:
43
  1. **Identify Actionable Language**: Scan the text for phrases indicating a task, such as "will send," "is responsible for," "we need to," "I'll follow up on," etc.
@@ -80,6 +90,39 @@ async def transcribe_chunk(chunk_index: int, audio_chunk: AudioSegment):
80
  logger.error(f"Error transcribing chunk {chunk_index + 1}: {e}")
81
  return (chunk_index, f"[TRANSCRIPTION FAILED FOR SEGMENT {chunk_index+1}]")
82
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  async def run_pipeline(task_id: str, file_path: Path, tasks_db: dict):
84
  if not groq_client:
85
  tasks_db[task_id] = {"status": "failed", "result": "Groq client is not initialized. Check API key."}
@@ -138,48 +181,51 @@ async def run_pipeline(task_id: str, file_path: Path, tasks_db: dict):
138
  logger.info(f"Running {len(transcription_tasks)} transcription tasks in parallel...")
139
  transcription_results = await asyncio.gather(*transcription_tasks)
140
 
141
- # Sort results by index and join
142
  transcription_results.sort(key=lambda x: x[0])
143
- full_transcript = "\n".join([text for index, text in transcription_results])
 
144
 
145
  if not full_transcript.strip():
146
  raise ValueError("Transcription result is empty.")
147
 
148
- # --- Final Analysis with Groq LLM ---
149
- logger.info("Starting final analysis with Groq LLM...")
150
 
151
- summary_task = asyncio.to_thread(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
  groq_client.chat.completions.create,
153
  model="qwen/qwen3-32b",
154
- messages=[{"role": "system", "content": SUMMARIZATION_SYSTEM_PROMPT}, {"role": "user", "content": full_transcript}],
155
- temperature=0.6,
156
- reasoning_effort="default",
157
- reasoning_format="hidden",
158
  max_tokens=1024
159
  )
160
 
161
- action_item_task = asyncio.to_thread(
162
- groq_client.chat.completions.create,
163
- model="qwen/qwen3-32b",
164
- messages=[{"role": "system", "content": ACTION_ITEMS_SYSTEM_PROMPT}, {"role": "user", "content": full_transcript}],
165
- temperature=0.6,
166
- reasoning_effort="default",
167
- reasoning_format="hidden",
168
- max_tokens=1024,
169
- response_format={"type": "json_object"}
170
- )
171
-
172
- summary_completion, action_item_completion = await asyncio.gather(summary_task, action_item_task)
173
-
174
- summary = summary_completion.choices[0].message.content
175
- action_items = json.loads(action_item_completion.choices[0].message.content).get("action_items", [])
176
 
177
  logger.info(f"Final analysis complete for task {task_id}.")
178
 
179
  final_result = {
180
  "transcript": full_transcript,
181
- "summary": summary,
182
- "action_items": action_items,
183
  }
184
  tasks_db[task_id] = {"status": "complete", "result": final_result}
185
 
 
26
  logger.error(f"Failed to initialize Groq client: {e}")
27
 
28
  # --- Prompts ---
29
+ CHUNK_SUMMARIZATION_SYSTEM_PROMPT = """
30
+ You are an expert AI assistant specializing in creating concise, structured, and insightful summaries of parts of meeting and lecture transcripts. This is a segment of a larger transcript. Your goal is to distill the most critical information into a format that is easy to read.
31
 
32
  Instructions:
33
+ 1. **Identify Core Themes**: Begin by identifying the main topics and objectives discussed in this segment.
34
  2. **Extract Key Decisions**: Pinpoint any decisions that were made, including the rationale behind them if available.
35
+ 3. **Highlight Main Outcomes**: Detail the primary results or conclusions reached in this segment.
36
+ 4. **Structure the Output**: Present the summary in a clean, professional format. Use bullet points for clarity.
37
+ 5. **Maintain Neutrality**: The summary should be objective and free of personal interpretation or bias.
38
+ """
39
+ FINAL_SUMMARIZATION_SYSTEM_PROMPT = """
40
+ You are an expert AI assistant specializing in combining multiple segment summaries into a single concise, structured, and insightful summary of the entire meeting or lecture. Your goal is to distill the most critical information from all segments into a format that is easy to read and act upon.
41
+
42
+ Instructions:
43
+ 1. **Identify Overall Core Themes**: Synthesize the main topics and objectives from all segments.
44
+ 2. **Extract Key Decisions**: Compile any decisions made across segments, including rationales if available.
45
+ 3. **Highlight Main Outcomes**: Detail the primary results or conclusions from the entire discussion.
46
  4. **Structure the Output**: Present the summary in a clean, professional format. Use bullet points for clarity.
47
  5. **Maintain Neutrality**: The summary should be objective and free of personal interpretation or bias.
48
  """
49
  ACTION_ITEMS_SYSTEM_PROMPT = """
50
+ You are a highly specialized AI assistant tasked with identifying and extracting actionable tasks, commitments, and deadlines from a segment of a meeting or lecture transcript. Your output must be clear, concise, and formatted as a JSON object.
51
 
52
  Instructions:
53
  1. **Identify Actionable Language**: Scan the text for phrases indicating a task, such as "will send," "is responsible for," "we need to," "I'll follow up on," etc.
 
90
  logger.error(f"Error transcribing chunk {chunk_index + 1}: {e}")
91
  return (chunk_index, f"[TRANSCRIPTION FAILED FOR SEGMENT {chunk_index+1}]")
92
 
93
+ async def process_transcript_chunk(chunk_index: int, chunk_text: str):
94
+ """Process a single transcript chunk for summary and action items."""
95
+ logger.info(f"Starting processing for transcript chunk {chunk_index + 1}...")
96
+ try:
97
+ summary_task = asyncio.to_thread(
98
+ groq_client.chat.completions.create,
99
+ model="qwen/qwen3-32b",
100
+ messages=[{"role": "system", "content": CHUNK_SUMMARIZATION_SYSTEM_PROMPT}, {"role": "user", "content": chunk_text}],
101
+ temperature=0.2,
102
+ max_tokens=512
103
+ )
104
+
105
+ action_task = asyncio.to_thread(
106
+ groq_client.chat.completions.create,
107
+ model="qwen/qwen3-32b",
108
+ messages=[{"role": "system", "content": ACTION_ITEMS_SYSTEM_PROMPT}, {"role": "user", "content": chunk_text}],
109
+ temperature=0.1,
110
+ max_tokens=512,
111
+ response_format={"type": "json_object"}
112
+ )
113
+
114
+ summary_completion, action_completion = await asyncio.gather(summary_task, action_task)
115
+
116
+ summary = summary_completion.choices[0].message.content
117
+ action_items_json = json.loads(action_completion.choices[0].message.content)
118
+ action_items = action_items_json.get("action_items", [])
119
+
120
+ logger.info(f"Finished processing for transcript chunk {chunk_index + 1}.")
121
+ return (chunk_index, summary, action_items)
122
+ except Exception as e:
123
+ logger.error(f"Error processing transcript chunk {chunk_index + 1}: {e}")
124
+ return (chunk_index, "[SUMMARY FAILED]", [])
125
+
126
  async def run_pipeline(task_id: str, file_path: Path, tasks_db: dict):
127
  if not groq_client:
128
  tasks_db[task_id] = {"status": "failed", "result": "Groq client is not initialized. Check API key."}
 
181
  logger.info(f"Running {len(transcription_tasks)} transcription tasks in parallel...")
182
  transcription_results = await asyncio.gather(*transcription_tasks)
183
 
184
+ # Sort results by index
185
  transcription_results.sort(key=lambda x: x[0])
186
+ chunk_transcripts = [text for index, text in transcription_results]
187
+ full_transcript = "\n".join(chunk_transcripts)
188
 
189
  if not full_transcript.strip():
190
  raise ValueError("Transcription result is empty.")
191
 
192
+ # --- Chunked Analysis with Groq LLM ---
193
+ logger.info("Starting chunked analysis with Groq LLM...")
194
 
195
+ processing_tasks = []
196
+ for i, chunk_text in enumerate(chunk_transcripts):
197
+ processing_tasks.append(process_transcript_chunk(i, chunk_text))
198
+
199
+ processing_results = await asyncio.gather(*processing_tasks)
200
+
201
+ # Sort by index
202
+ processing_results.sort(key=lambda x: x[0])
203
+
204
+ chunk_summaries = [summary for index, summary, actions in processing_results]
205
+ all_action_items = []
206
+ for index, summary, actions in processing_results:
207
+ all_action_items.extend(actions)
208
+
209
+ # Combine chunk summaries into final summary
210
+ combined_summaries = "\n\n---\n\n".join([f"Segment {i+1}:\n{summary}" for i, summary in enumerate(chunk_summaries)])
211
+
212
+ final_summary_task = asyncio.to_thread(
213
  groq_client.chat.completions.create,
214
  model="qwen/qwen3-32b",
215
+ messages=[{"role": "system", "content": FINAL_SUMMARIZATION_SYSTEM_PROMPT}, {"role": "user", "content": combined_summaries}],
216
+ temperature=0.2,
 
 
217
  max_tokens=1024
218
  )
219
 
220
+ final_summary_completion = await final_summary_task
221
+ final_summary = final_summary_completion.choices[0].message.content
 
 
 
 
 
 
 
 
 
 
 
 
 
222
 
223
  logger.info(f"Final analysis complete for task {task_id}.")
224
 
225
  final_result = {
226
  "transcript": full_transcript,
227
+ "summary": final_summary,
228
+ "action_items": all_action_items,
229
  }
230
  tasks_db[task_id] = {"status": "complete", "result": final_result}
231