X-Transcriber / app.py
nujoka's picture
Upload folder using huggingface_hub
cefcf02 verified
import os
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
import gradio as gr
import whisper
def transcribe_audio(audio_path: str, model_name: str):
"""
Transcribes an audio file to text using the Whisper model and saves it as a PDF.
Args:
audio_path: The path to the audio file.
model_name: The name of the Whisper model to use ("Cheetah", "Whale",
or "Dolphin").
Returns:
A tuple containing the transcribed text and the path to the generated PDF.
"""
# Map the custom model names to actual Whisper model names
model_map = {
"Cheetah": "base",
"Whale": "small",
"Dolphin": "medium"
}
whisper_model_name = model_map.get(model_name, "base") # Default to base if name not found
# Load the Whisper model
model = whisper.load_model(whisper_model_name)
# Transcribe the audio
result = model.transcribe(audio_path)
transcribed_text = result["text"]
# Create a PDF document
pdf_filename = "transcription.pdf"
c = canvas.Canvas(pdf_filename, pagesize=letter)
# Add the transcribed text to the PDF
textobject = c.beginText(50, 750)
textobject.setFont("Helvetica", 12)
# Split text into lines to fit on the page
lines = transcribed_text.split('\n')
for line in lines:
textobject.textLine(line)
c.drawText(textobject)
# Save the PDF
c.save()
return transcribed_text, pdf_filename
# Define the Gradio interface
interface = gr.Interface(
fn=transcribe_audio,
inputs=[
gr.Audio(type="filepath", label="Upload Audio File"),
gr.Radio(["Cheetah", "Whale", "Dolphin"], label="Select Whisper Model", value="Cheetah")
],
outputs=[
gr.Textbox(label="Transcription"),
gr.File(label="Download PDF")
],
title="Audio Transcription App",
description="Upload an audio file and select a Whisper model to get the transcription and a downloadable PDF."
)
# Launch the interface - Note: For deployment on Hugging Face Spaces,
# the interface will be launched automatically by the Space environment.
# You might not need the .launch() call in the final app.py for Spaces.
# However, for testing locally before deployment, keep it.
# For this app.py file intended for `gradio deploy`, we will remove the .launch()
# as the Space will handle the launch.
# interface.launch() # Commenting out .launch() for Spaces deployment