File size: 3,297 Bytes
ba9ada9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import gradio as gr
import ffmpeg
import os
import docx
import warnings
import assemblyai as aai
import subprocess

# Suppress FutureWarnings
warnings.simplefilter("ignore", category=FutureWarning)

Key = os.getenv("KeyA")  # Ensure this is set in your environment
aai.settings.api_key = Key

# Function to check if FFmpeg is installed
def is_ffmpeg_installed():
    try:
        subprocess.run(["ffmpeg", "-version"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
        return True
    except subprocess.CalledProcessError:
        return False
    except FileNotFoundError:
        return False

# Function to extract audio from video safely
def extract_audio(video_path, output_audio_path="temp_audio.mp3"):
    if not is_ffmpeg_installed():
        raise RuntimeError("FFmpeg is not installed or not found in PATH.")

    try:
        ffmpeg.input(video_path).output(output_audio_path, format="mp3").run(overwrite_output=True, quiet=True)
        return output_audio_path
    except ffmpeg.Error as e:
        raise RuntimeError(f"FFmpeg error: {e.stderr.decode()}")

# Function to transcribe audio using AssemblyAI
def transcribe_audio(file):
    ext = os.path.splitext(file.name)[-1].lower()
    audio_path = "temp_audio.mp3"

    # Extract audio if video is uploaded
    if ext in [".mp4", ".avi", ".mov", ".mkv"]:
        audio_path = extract_audio(file.name)
    else:
        audio_path = file.name  # Use audio file directly

    # Upload file to AssemblyAI
    transcriber = aai.Transcriber()
    config = aai.TranscriptionConfig(speaker_labels=True)
    transcript = transcriber.transcribe(audio_path, config=config)

    return "\n".join([f"Speaker {utt.speaker}: {utt.text}" for utt in transcript.utterances])

# Function to export transcription
def save_transcription(text, file_format):
    file_path = f"transcription.{file_format.lower()}"

    if file_format == "TXT":
        with open(file_path, "w") as f:
            f.write(text)
    elif file_format == "DOCX":
        doc = docx.Document()
        doc.add_paragraph(text)
        doc.save(file_path)
    elif file_format == "SRT":
        with open(file_path, "w") as f:
            for i, line in enumerate(text.split(".")):
                start_time = f"00:00:{i*5:02d},000"
                end_time = f"00:00:{(i+1)*5:02d},000"
                f.write(f"{i+1}\n{start_time} --> {end_time}\n{line.strip()}\n\n")

    return file_path

# Gradio Interface
with gr.Blocks() as demo:
    gr.Markdown("# 🎙️ Skroll - Audio & Video Transcription Tool")
    gr.Markdown("Upload an audio or video file and transcribe. Export in .txt, .docx, or .srt format.")

    file_input = gr.File(label="Upload Audio or Video")
    transcript_output = gr.Textbox(label="Transcription", interactive=True, lines=10)
    transcribe_btn = gr.Button("Transcribe")

    with gr.Row():
        file_format = gr.Dropdown(["TXT", "DOCX", "SRT"], label="Export Format")
        export_btn = gr.Button("Export")

    download_link = gr.File(label="Download Transcription")

    # Define Actions
    transcribe_btn.click(transcribe_audio, inputs=[file_input], outputs=transcript_output)
    export_btn.click(save_transcription, inputs=[transcript_output, file_format], outputs=download_link)

# Launch App
demo.launch(debug=True)