update to include t5 model
Browse files
app.py
CHANGED
|
@@ -10,6 +10,7 @@ import os
|
|
| 10 |
import tempfile
|
| 11 |
import gradio.themes as gr_themes
|
| 12 |
import csv
|
|
|
|
| 13 |
|
| 14 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 15 |
MODEL_NAME="nvidia/parakeet-tdt-0.6b-v2"
|
|
@@ -18,6 +19,9 @@ MODEL_NAME="nvidia/parakeet-tdt-0.6b-v2"
|
|
| 18 |
model = ASRModel.from_pretrained(model_name=MODEL_NAME)
|
| 19 |
model.eval()
|
| 20 |
|
|
|
|
|
|
|
|
|
|
| 21 |
def get_audio_segment(audio_path, start_second, end_second):
|
| 22 |
"""
|
| 23 |
Extract a segment of audio from a given audio file.
|
|
@@ -232,16 +236,26 @@ def get_full_transcript(vis_data):
|
|
| 232 |
return " ".join([row[2] for row in vis_data if len(row) == 3])
|
| 233 |
|
| 234 |
# Simple summary function (replace with a real model if needed)
|
|
|
|
| 235 |
def summarize_transcript(transcript):
|
| 236 |
-
|
|
|
|
|
|
|
|
|
|
| 237 |
return "No transcript available to summarize."
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 245 |
|
| 246 |
# Apply the custom theme
|
| 247 |
|
|
|
|
| 10 |
import tempfile
|
| 11 |
import gradio.themes as gr_themes
|
| 12 |
import csv
|
| 13 |
+
from transformers import pipeline
|
| 14 |
|
| 15 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 16 |
MODEL_NAME="nvidia/parakeet-tdt-0.6b-v2"
|
|
|
|
| 19 |
model = ASRModel.from_pretrained(model_name=MODEL_NAME)
|
| 20 |
model.eval()
|
| 21 |
|
| 22 |
+
# Load the summarization model once at startup
|
| 23 |
+
summarizer = pipeline("summarization", model="Falconsai/text_summarization", device=device)
|
| 24 |
+
|
| 25 |
def get_audio_segment(audio_path, start_second, end_second):
|
| 26 |
"""
|
| 27 |
Extract a segment of audio from a given audio file.
|
|
|
|
| 236 |
return " ".join([row[2] for row in vis_data if len(row) == 3])
|
| 237 |
|
| 238 |
# Simple summary function (replace with a real model if needed)
|
| 239 |
+
# Replace the old summarize_transcript function with this one
|
| 240 |
def summarize_transcript(transcript):
|
| 241 |
+
"""
|
| 242 |
+
Summarizes the transcript using the Falconsai/text_summarization model.
|
| 243 |
+
"""
|
| 244 |
+
if not transcript or not transcript.strip():
|
| 245 |
return "No transcript available to summarize."
|
| 246 |
+
|
| 247 |
+
try:
|
| 248 |
+
gr.Info("Generating summary...", duration=2)
|
| 249 |
+
# The pipeline returns a list of dictionaries
|
| 250 |
+
result = summarizer(transcript, max_length=250, min_length=50, do_sample=False)
|
| 251 |
+
# Extract the summary text from the first result
|
| 252 |
+
summary = result[0]['summary_text']
|
| 253 |
+
return summary
|
| 254 |
+
except Exception as e:
|
| 255 |
+
error_message = f"An error occurred during summarization: {e}"
|
| 256 |
+
print(error_message) # Log the error to the console for debugging
|
| 257 |
+
gr.Warning("Sorry, the summary could not be generated at this time.")
|
| 258 |
+
return "" # Return an empty string or the warning message
|
| 259 |
|
| 260 |
# Apply the custom theme
|
| 261 |
|