import os from reportlab.lib.pagesizes import letter from reportlab.pdfgen import canvas import gradio as gr import whisper def transcribe_audio(audio_path: str, model_name: str): """ Transcribes an audio file to text using the Whisper model and saves it as a PDF. Args: audio_path: The path to the audio file. model_name: The name of the Whisper model to use ("Cheetah", "Whale", or "Dolphin"). Returns: A tuple containing the transcribed text and the path to the generated PDF. """ # Map the custom model names to actual Whisper model names model_map = { "Cheetah": "base", "Whale": "small", "Dolphin": "medium" } whisper_model_name = model_map.get(model_name, "base") # Default to base if name not found # Load the Whisper model model = whisper.load_model(whisper_model_name) # Transcribe the audio result = model.transcribe(audio_path) transcribed_text = result["text"] # Create a PDF document pdf_filename = "transcription.pdf" c = canvas.Canvas(pdf_filename, pagesize=letter) # Add the transcribed text to the PDF textobject = c.beginText(50, 750) textobject.setFont("Helvetica", 12) # Split text into lines to fit on the page lines = transcribed_text.split('\n') for line in lines: textobject.textLine(line) c.drawText(textobject) # Save the PDF c.save() return transcribed_text, pdf_filename # Define the Gradio interface interface = gr.Interface( fn=transcribe_audio, inputs=[ gr.Audio(type="filepath", label="Upload Audio File"), gr.Radio(["Cheetah", "Whale", "Dolphin"], label="Select Whisper Model", value="Cheetah") ], outputs=[ gr.Textbox(label="Transcription"), gr.File(label="Download PDF") ], title="Audio Transcription App", description="Upload an audio file and select a Whisper model to get the transcription and a downloadable PDF." ) # Launch the interface - Note: For deployment on Hugging Face Spaces, # the interface will be launched automatically by the Space environment. # You might not need the .launch() call in the final app.py for Spaces. # However, for testing locally before deployment, keep it. # For this app.py file intended for `gradio deploy`, we will remove the .launch() # as the Space will handle the launch. # interface.launch() # Commenting out .launch() for Spaces deployment