Spaces:

wadamalon
/

Audio_STT_Summarizer

Sleeping

File size: 1,875 Bytes

f6eb269
2660dee
b2a9ca6
d9261eb
b2a9ca6
2660dee
f6eb269
d1226a0
 
 
 
 
 
 
2660dee
 
91fd985
 
66e574b
2660dee
d9261eb
 
 
 
 
 
 
 
 
d1226a0
91fd985
d9261eb
 
2660dee
 
f295daf
d9261eb
2660dee
 
d9261eb
2660dee
 
d9261eb
 
 
b2a9ca6
f6eb269
3c4c4a7
b2a9ca6
 
f6eb269
a9679ad
 
 
91fd985
 
f6eb269
 
 
 
a9679ad

# Import necessary libraries
import os # so we can get our token
import gradio as gr
import torch
from transformers import pipeline
import google.generativeai as genai

# Initialize models once at startup
print("Loading models at startup...")
speech_pipe = pipeline(
    "automatic-speech-recognition",
    model="facebook/wav2vec2-base-960h",
    chunk_length_s=30,
)

# Configure Gemini
apikey = os.getenv("geminiapi") # get our token 🔥
genai.configure(api_key=apikey)  
model = genai.GenerativeModel('gemini-2.0-flash')

print("Models loaded successfully!")

def transcript_audio(audio_file):
    """
    This function transcribes audio and extracts key points
    """
    try:
        # Step 1: Transcribe the audio file to text
        print("Transcribing audio...")
        transcript_result = speech_pipe(audio_file, batch_size=8)
        transcript_txt = transcript_result["text"].lower().capitalize()  # Fix the all caps issue (in case we want to show transcription)
        print(f"Transcription: {transcript_txt}")
        
        # Step 2: Extract key points using Gemini
        prompt = f"Extract 3-5 key points from this audio transcription. Format as bullet points:\n\n{transcript_txt}"
        
        print("Extracting key points...")
        response = model.generate_content(prompt)
        key_points = response.text
        
        # Return only key points
        return key_points
        
    except Exception as e:
        return f"Error processing audio: {str(e)}"

# --- Gradio Interface ---
audio_input = gr.Audio(sources=["upload"], type="filepath")
output_text = gr.Textbox()

iface = gr.Interface(
    fn=transcript_audio,
    inputs=audio_input,
    outputs=output_text,
    title="Audio File Automatic Summarizer",
    description="Upload an audio file. Get a summary."
)

# Launch the app
if __name__ == "__main__":
    iface.launch()