omar1232 commited on
Commit
73daa36
·
verified ·
1 Parent(s): e7c4b74

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -10
app.py CHANGED
@@ -4,6 +4,11 @@ from pydub import AudioSegment
4
  import tempfile
5
  from langdetect import detect
6
  import os
 
 
 
 
 
7
 
8
  # Process audio and transcribe
9
  def process_audio(audio_input):
@@ -14,22 +19,25 @@ def process_audio(audio_input):
14
  if isinstance(audio_input, tuple): # Recorded audio (sample_rate, numpy_array)
15
  sample_rate, audio_data = audio_input
16
  AudioSegment(audio_data, sample_rate=sample_rate, frame_rate=sample_rate, channels=1).export(temp_file.name, format="wav")
17
- else: # Uploaded audio file
18
- # Load the uploaded audio file and convert it to WAV
19
  audio = AudioSegment.from_file(audio_input)
20
  audio = audio.set_channels(1) # Convert to mono for consistency
21
  audio.export(temp_file.name, format="wav")
22
  audio_file_path = temp_file.name
23
 
24
- # Transcribe the WAV file
 
 
 
 
25
  with sr.AudioFile(audio_file_path) as source:
26
  audio = recognizer.record(source)
27
  try:
28
- transcription = recognizer.recognize_google(audio)
29
  except sr.UnknownValueError:
30
  transcription = "Could not understand the audio."
31
- except sr.RequestError:
32
- transcription = "Transcription service unavailable."
33
 
34
  # Detect language
35
  try:
@@ -59,7 +67,33 @@ def audio_transcriptor(audio_file, audio_record):
59
 
60
  return language, transcription, text_file
61
 
62
- # Custom HTML for styled transcription display
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  transcription_html = """
64
  <div class="transcription-container" id="transcriptionContainer">
65
  <h2>Transcription Results</h2>
@@ -103,7 +137,7 @@ transcription_html = """
103
  # Gradio interface
104
  with gr.Blocks() as demo:
105
  gr.Markdown("# Audio Transcriptor")
106
- gr.Markdown("Upload an audio file or record audio to transcribe the speech and detect the language.")
107
 
108
  with gr.Row():
109
  audio_file = gr.Audio(sources=["upload"], type="filepath", label="Upload Audio")
@@ -132,5 +166,27 @@ with gr.Blocks() as demo:
132
  outputs=[audio_file, audio_record]
133
  )
134
 
135
- # Launch Gradio app
136
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  import tempfile
5
  from langdetect import detect
6
  import os
7
+ from telegram import Update
8
+ from telegram.ext import Application, CommandHandler, MessageHandler, filters, ContextTypes
9
+
10
+ # Telegram bot token (to be set via Hugging Face Space secrets)
11
+ TELEGRAM_BOT_TOKEN = os.getenv(8030235633:AAHKvxM9Nzp0DkxfdotMux3572tC_5CGEUA)
12
 
13
  # Process audio and transcribe
14
  def process_audio(audio_input):
 
19
  if isinstance(audio_input, tuple): # Recorded audio (sample_rate, numpy_array)
20
  sample_rate, audio_data = audio_input
21
  AudioSegment(audio_data, sample_rate=sample_rate, frame_rate=sample_rate, channels=1).export(temp_file.name, format="wav")
22
+ else: # Uploaded audio file (file path or Telegram file)
 
23
  audio = AudioSegment.from_file(audio_input)
24
  audio = audio.set_channels(1) # Convert to mono for consistency
25
  audio.export(temp_file.name, format="wav")
26
  audio_file_path = temp_file.name
27
 
28
+ # Debug: Check if the WAV file is valid
29
+ if os.path.getsize(audio_file_path) == 0:
30
+ raise ValueError("The converted WAV file is empty. The input audio may be corrupted.")
31
+
32
+ # Transcribe the WAV file using pocketsphinx (offline)
33
  with sr.AudioFile(audio_file_path) as source:
34
  audio = recognizer.record(source)
35
  try:
36
+ transcription = recognizer.recognize_sphinx(audio) # Use pocketsphinx for offline transcription
37
  except sr.UnknownValueError:
38
  transcription = "Could not understand the audio."
39
+ except sr.RequestError as e:
40
+ transcription = f"Transcription failed: {str(e)}"
41
 
42
  # Detect language
43
  try:
 
67
 
68
  return language, transcription, text_file
69
 
70
+ # Telegram bot handlers
71
+ async def start(update: Update, context: ContextTypes.DEFAULT_TYPE):
72
+ await update.message.reply_text("Hello! Send me an audio file, and I'll transcribe it for you.")
73
+
74
+ async def handle_audio(update: Update, context: ContextTypes.DEFAULT_TYPE):
75
+ # Download the audio file from Telegram
76
+ audio_file = await update.message.audio.get_file()
77
+ audio_path = f"/tmp/{audio_file.file_id}.ogg" # Telegram audio files are typically in OGG format
78
+ await audio_file.download_to_drive(audio_path)
79
+
80
+ # Process the audio using the existing transcriptor function
81
+ language, transcription, text_file_path = process_audio(audio_path)
82
+
83
+ # Send the transcription back to the user
84
+ await update.message.reply_text(f"Detected Language: {language}\nTranscription: {transcription}")
85
+
86
+ # Send the transcription file
87
+ with open(text_file_path, 'rb') as f:
88
+ await update.message.reply_document(document=f, filename="transcription.txt")
89
+
90
+ # Clean up temporary files
91
+ if os.path.exists(audio_path):
92
+ os.remove(audio_path)
93
+ if os.path.exists(text_file_path):
94
+ os.remove(text_file_path)
95
+
96
+ # Custom HTML for styled transcription display (for Gradio interface)
97
  transcription_html = """
98
  <div class="transcription-container" id="transcriptionContainer">
99
  <h2>Transcription Results</h2>
 
137
  # Gradio interface
138
  with gr.Blocks() as demo:
139
  gr.Markdown("# Audio Transcriptor")
140
+ gr.Markdown("Upload an audio file or record audio to transcribe the speech and detect the language. You can also interact with the bot via Telegram!")
141
 
142
  with gr.Row():
143
  audio_file = gr.Audio(sources=["upload"], type="filepath", label="Upload Audio")
 
166
  outputs=[audio_file, audio_record]
167
  )
168
 
169
+ # Start the Telegram bot in a separate thread
170
+ def run_telegram_bot():
171
+ if not TELEGRAM_BOT_TOKEN:
172
+ print("Telegram bot token not found. Please set TELEGRAM_BOT_TOKEN in the Space secrets.")
173
+ return
174
+
175
+ application = Application.builder().token(TELEGRAM_BOT_TOKEN).build()
176
+
177
+ # Add handlers
178
+ application.add_handler(CommandHandler("start", start))
179
+ application.add_handler(MessageHandler(filters.AUDIO, handle_audio))
180
+
181
+ # Start the bot
182
+ print("Starting Telegram bot...")
183
+ application.run_polling(allowed_updates=Update.ALL_TYPES)
184
+
185
+ # Launch Gradio app and Telegram bot
186
+ if __name__ == "__main__":
187
+ import threading
188
+ # Start the Telegram bot in a separate thread
189
+ bot_thread = threading.Thread(target=run_telegram_bot)
190
+ bot_thread.start()
191
+ # Launch Gradio app
192
+ demo.launch()