hamza2923 commited on
Commit
99e2ed9
·
verified ·
1 Parent(s): 5ece0ce

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +76 -0
  2. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline
3
+ import torch
4
+ from pydub import AudioSegment
5
+ import os
6
+
7
+ # Initialize the Whisper model
8
+ try:
9
+ whisper = pipeline(
10
+ "automatic-speech-recognition",
11
+ model="openai/whisper-small",
12
+ device="cuda" if torch.cuda.is_available() else "cpu"
13
+ )
14
+ except Exception as e:
15
+ raise Exception(f"Failed to load Whisper model: {str(e)}")
16
+
17
+ # Define the transcription function with chunking and automatic language detection
18
+ def transcribe_audio(audio):
19
+ if audio is None:
20
+ return "Error: Please upload an audio file."
21
+
22
+ # Validate file size (100 MB limit)
23
+ try:
24
+ file_size_mb = os.path.getsize(audio) / (1024 * 1024)
25
+ if file_size_mb > 100:
26
+ return "Error: Audio file exceeds 100 MB limit."
27
+ except FileNotFoundError:
28
+ return "Error: Audio file not found."
29
+
30
+ try:
31
+ # Load and process audio
32
+ audio_segment = AudioSegment.from_file(audio)
33
+ duration_ms = len(audio_segment)
34
+ chunk_length_ms = 30000 # 30 seconds
35
+
36
+ # Chunk long audio files
37
+ if duration_ms > chunk_length_ms:
38
+ chunks = [audio_segment[i:i + chunk_length_ms] for i in range(0, duration_ms, chunk_length_ms)]
39
+ transcriptions = []
40
+ for i, chunk in enumerate(chunks):
41
+ chunk_path = f"chunk_{i}.wav"
42
+ chunk.export(chunk_path, format="wav")
43
+ result = whisper(chunk_path, generate_kwargs={"task": "transcribe"}) # Automatic language detection
44
+ transcriptions.append(result["text"])
45
+ if os.path.exists(chunk_path):
46
+ os.remove(chunk_path)
47
+ return " ".join(transcriptions)
48
+ else:
49
+ result = whisper(audio, generate_kwargs={"task": "transcribe"}) # Automatic language detection
50
+ return result["text"]
51
+
52
+ except Exception as e:
53
+ return f"Error during transcription: {str(e)}"
54
+ finally:
55
+ # Clean up uploaded file
56
+ if os.path.exists(audio):
57
+ try:
58
+ os.remove(audio)
59
+ except Exception:
60
+ pass
61
+
62
+ # Create Gradio interface
63
+ demo = gr.Interface(
64
+ fn=transcribe_audio,
65
+ inputs=[
66
+ gr.Audio(type="filepath", label="Upload an Audio File (MP3, WAV, max 100 MB)")
67
+ ],
68
+ outputs=gr.Textbox(label="Transcription"),
69
+ title="Audio to Text Transcription with Whisper",
70
+ description="Upload an audio file (MP3/WAV, up to 100 MB) to transcribe it using Open AI's Whisper model with automatic language detection.",
71
+ allow_flagging="never"
72
+ )
73
+
74
+ # Launch the app
75
+ if __name__ == "__main__":
76
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ transformers==4.44.2
2
+ gradio==4.44.0
3
+ torch==2.4.1
4
+ pydub==0.25.1
5
+ ffmpeg-python==0.2.0