Spaces:
Sleeping
Sleeping
| # Import necessary libraries | |
| import os # so we can get our token | |
| import gradio as gr | |
| import torch | |
| from transformers import pipeline | |
| import google.generativeai as genai | |
| # Initialize models once at startup | |
| print("Loading models at startup...") | |
| speech_pipe = pipeline( | |
| "automatic-speech-recognition", | |
| model="facebook/wav2vec2-base-960h", | |
| chunk_length_s=30, | |
| ) | |
| # Configure Gemini | |
| apikey = os.getenv("geminiapi") # get our token 🔥 | |
| genai.configure(api_key=apikey) | |
| model = genai.GenerativeModel('gemini-2.0-flash') | |
| print("Models loaded successfully!") | |
| def transcript_audio(audio_file): | |
| """ | |
| This function transcribes audio and extracts key points | |
| """ | |
| try: | |
| # Step 1: Transcribe the audio file to text | |
| print("Transcribing audio...") | |
| transcript_result = speech_pipe(audio_file, batch_size=8) | |
| transcript_txt = transcript_result["text"].lower().capitalize() # Fix the all caps issue (in case we want to show transcription) | |
| print(f"Transcription: {transcript_txt}") | |
| # Step 2: Extract key points using Gemini | |
| prompt = f"Extract 3-5 key points from this audio transcription. Format as bullet points:\n\n{transcript_txt}" | |
| print("Extracting key points...") | |
| response = model.generate_content(prompt) | |
| key_points = response.text | |
| # Return only key points | |
| return key_points | |
| except Exception as e: | |
| return f"Error processing audio: {str(e)}" | |
| # --- Gradio Interface --- | |
| audio_input = gr.Audio(sources=["upload"], type="filepath") | |
| output_text = gr.Textbox() | |
| iface = gr.Interface( | |
| fn=transcript_audio, | |
| inputs=audio_input, | |
| outputs=output_text, | |
| title="Audio File Automatic Summarizer", | |
| description="Upload an audio file. Get a summary." | |
| ) | |
| # Launch the app | |
| if __name__ == "__main__": | |
| iface.launch() |