Spaces:

LT4Ryan
/

AudioDog

Sleeping

App Files Files Community

LT4Ryan commited on Aug 20, 2025

Commit

9b23760

verified ·

1 Parent(s): d145ab8

update to include t5 model

Browse files

Files changed (1) hide show

app.py +22 -8

app.py CHANGED Viewed

@@ -10,6 +10,7 @@ import os
 import tempfile
 import gradio.themes as gr_themes
 import csv
 device = "cuda" if torch.cuda.is_available() else "cpu"
 MODEL_NAME="nvidia/parakeet-tdt-0.6b-v2"
@@ -18,6 +19,9 @@ MODEL_NAME="nvidia/parakeet-tdt-0.6b-v2"
 model = ASRModel.from_pretrained(model_name=MODEL_NAME)
 model.eval()
 def get_audio_segment(audio_path, start_second, end_second):
     """
     Extract a segment of audio from a given audio file.
@@ -232,16 +236,26 @@ def get_full_transcript(vis_data):
     return " ".join([row[2] for row in vis_data if len(row) == 3])
 # Simple summary function (replace with a real model if needed)
 def summarize_transcript(transcript):
-    if not transcript:
         return "No transcript available to summarize."
-    # Placeholder: just return first 2 sentences or 200 chars
-    import re
-    sentences = re.split(r'(?<=[.!?]) +', transcript)
-    summary = " ".join(sentences[:2])
-    if len(summary) < 40:
-        summary = transcript[:200] + ("..." if len(transcript) > 200 else "")
-    return summary
 # Apply the custom theme

 import tempfile
 import gradio.themes as gr_themes
 import csv
+from transformers import pipeline
 device = "cuda" if torch.cuda.is_available() else "cpu"
 MODEL_NAME="nvidia/parakeet-tdt-0.6b-v2"
 model = ASRModel.from_pretrained(model_name=MODEL_NAME)
 model.eval()
+# Load the summarization model once at startup
+summarizer = pipeline("summarization", model="Falconsai/text_summarization", device=device)
 def get_audio_segment(audio_path, start_second, end_second):
     """
     Extract a segment of audio from a given audio file.
     return " ".join([row[2] for row in vis_data if len(row) == 3])
 # Simple summary function (replace with a real model if needed)
+# Replace the old summarize_transcript function with this one
 def summarize_transcript(transcript):
+    """
+    Summarizes the transcript using the Falconsai/text_summarization model.
+    """
+    if not transcript or not transcript.strip():
         return "No transcript available to summarize."
+    try:
+        gr.Info("Generating summary...", duration=2)
+        # The pipeline returns a list of dictionaries
+        result = summarizer(transcript, max_length=250, min_length=50, do_sample=False)
+        # Extract the summary text from the first result
+        summary = result[0]['summary_text']
+        return summary
+    except Exception as e:
+        error_message = f"An error occurred during summarization: {e}"
+        print(error_message) # Log the error to the console for debugging
+        gr.Warning("Sorry, the summary could not be generated at this time.")
+        return "" # Return an empty string or the warning message
 # Apply the custom theme