# Import necessary libraries import os # so we can get our token import gradio as gr import torch from transformers import pipeline import google.generativeai as genai # Initialize models once at startup print("Loading models at startup...") speech_pipe = pipeline( "automatic-speech-recognition", model="facebook/wav2vec2-base-960h", chunk_length_s=30, ) # Configure Gemini apikey = os.getenv("geminiapi") # get our token 🔥 genai.configure(api_key=apikey) model = genai.GenerativeModel('gemini-2.0-flash') print("Models loaded successfully!") def transcript_audio(audio_file): """ This function transcribes audio and extracts key points """ try: # Step 1: Transcribe the audio file to text print("Transcribing audio...") transcript_result = speech_pipe(audio_file, batch_size=8) transcript_txt = transcript_result["text"].lower().capitalize() # Fix the all caps issue (in case we want to show transcription) print(f"Transcription: {transcript_txt}") # Step 2: Extract key points using Gemini prompt = f"Extract 3-5 key points from this audio transcription. Format as bullet points:\n\n{transcript_txt}" print("Extracting key points...") response = model.generate_content(prompt) key_points = response.text # Return only key points return key_points except Exception as e: return f"Error processing audio: {str(e)}" # --- Gradio Interface --- audio_input = gr.Audio(sources=["upload"], type="filepath") output_text = gr.Textbox() iface = gr.Interface( fn=transcript_audio, inputs=audio_input, outputs=output_text, title="Audio File Automatic Summarizer", description="Upload an audio file. Get a summary." ) # Launch the app if __name__ == "__main__": iface.launch()