Spaces:
Sleeping
Sleeping
File size: 1,875 Bytes
f6eb269 2660dee b2a9ca6 d9261eb b2a9ca6 2660dee f6eb269 d1226a0 2660dee 91fd985 66e574b 2660dee d9261eb d1226a0 91fd985 d9261eb 2660dee f295daf d9261eb 2660dee d9261eb 2660dee d9261eb b2a9ca6 f6eb269 3c4c4a7 b2a9ca6 f6eb269 a9679ad 91fd985 f6eb269 a9679ad | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 | # Import necessary libraries
import os # so we can get our token
import gradio as gr
import torch
from transformers import pipeline
import google.generativeai as genai
# Initialize models once at startup
print("Loading models at startup...")
speech_pipe = pipeline(
"automatic-speech-recognition",
model="facebook/wav2vec2-base-960h",
chunk_length_s=30,
)
# Configure Gemini
apikey = os.getenv("geminiapi") # get our token 🔥
genai.configure(api_key=apikey)
model = genai.GenerativeModel('gemini-2.0-flash')
print("Models loaded successfully!")
def transcript_audio(audio_file):
"""
This function transcribes audio and extracts key points
"""
try:
# Step 1: Transcribe the audio file to text
print("Transcribing audio...")
transcript_result = speech_pipe(audio_file, batch_size=8)
transcript_txt = transcript_result["text"].lower().capitalize() # Fix the all caps issue (in case we want to show transcription)
print(f"Transcription: {transcript_txt}")
# Step 2: Extract key points using Gemini
prompt = f"Extract 3-5 key points from this audio transcription. Format as bullet points:\n\n{transcript_txt}"
print("Extracting key points...")
response = model.generate_content(prompt)
key_points = response.text
# Return only key points
return key_points
except Exception as e:
return f"Error processing audio: {str(e)}"
# --- Gradio Interface ---
audio_input = gr.Audio(sources=["upload"], type="filepath")
output_text = gr.Textbox()
iface = gr.Interface(
fn=transcript_audio,
inputs=audio_input,
outputs=output_text,
title="Audio File Automatic Summarizer",
description="Upload an audio file. Get a summary."
)
# Launch the app
if __name__ == "__main__":
iface.launch() |