LT4Ryan commited on
Commit
9b23760
·
verified ·
1 Parent(s): d145ab8

update to include t5 model

Browse files
Files changed (1) hide show
  1. app.py +22 -8
app.py CHANGED
@@ -10,6 +10,7 @@ import os
10
  import tempfile
11
  import gradio.themes as gr_themes
12
  import csv
 
13
 
14
  device = "cuda" if torch.cuda.is_available() else "cpu"
15
  MODEL_NAME="nvidia/parakeet-tdt-0.6b-v2"
@@ -18,6 +19,9 @@ MODEL_NAME="nvidia/parakeet-tdt-0.6b-v2"
18
  model = ASRModel.from_pretrained(model_name=MODEL_NAME)
19
  model.eval()
20
 
 
 
 
21
  def get_audio_segment(audio_path, start_second, end_second):
22
  """
23
  Extract a segment of audio from a given audio file.
@@ -232,16 +236,26 @@ def get_full_transcript(vis_data):
232
  return " ".join([row[2] for row in vis_data if len(row) == 3])
233
 
234
  # Simple summary function (replace with a real model if needed)
 
235
  def summarize_transcript(transcript):
236
- if not transcript:
 
 
 
237
  return "No transcript available to summarize."
238
- # Placeholder: just return first 2 sentences or 200 chars
239
- import re
240
- sentences = re.split(r'(?<=[.!?]) +', transcript)
241
- summary = " ".join(sentences[:2])
242
- if len(summary) < 40:
243
- summary = transcript[:200] + ("..." if len(transcript) > 200 else "")
244
- return summary
 
 
 
 
 
 
245
 
246
  # Apply the custom theme
247
 
 
10
  import tempfile
11
  import gradio.themes as gr_themes
12
  import csv
13
+ from transformers import pipeline
14
 
15
  device = "cuda" if torch.cuda.is_available() else "cpu"
16
  MODEL_NAME="nvidia/parakeet-tdt-0.6b-v2"
 
19
  model = ASRModel.from_pretrained(model_name=MODEL_NAME)
20
  model.eval()
21
 
22
+ # Load the summarization model once at startup
23
+ summarizer = pipeline("summarization", model="Falconsai/text_summarization", device=device)
24
+
25
  def get_audio_segment(audio_path, start_second, end_second):
26
  """
27
  Extract a segment of audio from a given audio file.
 
236
  return " ".join([row[2] for row in vis_data if len(row) == 3])
237
 
238
  # Simple summary function (replace with a real model if needed)
239
+ # Replace the old summarize_transcript function with this one
240
  def summarize_transcript(transcript):
241
+ """
242
+ Summarizes the transcript using the Falconsai/text_summarization model.
243
+ """
244
+ if not transcript or not transcript.strip():
245
  return "No transcript available to summarize."
246
+
247
+ try:
248
+ gr.Info("Generating summary...", duration=2)
249
+ # The pipeline returns a list of dictionaries
250
+ result = summarizer(transcript, max_length=250, min_length=50, do_sample=False)
251
+ # Extract the summary text from the first result
252
+ summary = result[0]['summary_text']
253
+ return summary
254
+ except Exception as e:
255
+ error_message = f"An error occurred during summarization: {e}"
256
+ print(error_message) # Log the error to the console for debugging
257
+ gr.Warning("Sorry, the summary could not be generated at this time.")
258
+ return "" # Return an empty string or the warning message
259
 
260
  # Apply the custom theme
261