wadamalon's picture
Update app.py
91fd985 verified
# Import necessary libraries
import os # so we can get our token
import gradio as gr
import torch
from transformers import pipeline
import google.generativeai as genai
# Initialize models once at startup
print("Loading models at startup...")
speech_pipe = pipeline(
"automatic-speech-recognition",
model="facebook/wav2vec2-base-960h",
chunk_length_s=30,
)
# Configure Gemini
apikey = os.getenv("geminiapi") # get our token 🔥
genai.configure(api_key=apikey)
model = genai.GenerativeModel('gemini-2.0-flash')
print("Models loaded successfully!")
def transcript_audio(audio_file):
"""
This function transcribes audio and extracts key points
"""
try:
# Step 1: Transcribe the audio file to text
print("Transcribing audio...")
transcript_result = speech_pipe(audio_file, batch_size=8)
transcript_txt = transcript_result["text"].lower().capitalize() # Fix the all caps issue (in case we want to show transcription)
print(f"Transcription: {transcript_txt}")
# Step 2: Extract key points using Gemini
prompt = f"Extract 3-5 key points from this audio transcription. Format as bullet points:\n\n{transcript_txt}"
print("Extracting key points...")
response = model.generate_content(prompt)
key_points = response.text
# Return only key points
return key_points
except Exception as e:
return f"Error processing audio: {str(e)}"
# --- Gradio Interface ---
audio_input = gr.Audio(sources=["upload"], type="filepath")
output_text = gr.Textbox()
iface = gr.Interface(
fn=transcript_audio,
inputs=audio_input,
outputs=output_text,
title="Audio File Automatic Summarizer",
description="Upload an audio file. Get a summary."
)
# Launch the app
if __name__ == "__main__":
iface.launch()