omar1232 commited on
Commit
b554eea
·
verified ·
1 Parent(s): 4a31f88

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -42
app.py CHANGED
@@ -1,51 +1,44 @@
1
  import gradio as gr
 
2
  from pydub import AudioSegment
3
  import tempfile
4
  from langdetect import detect
5
  import os
6
  import asyncio
7
- import torch
8
- from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
9
  from telegram import Update
10
  from telegram.ext import Application, CommandHandler, MessageHandler, filters, ContextTypes
11
 
12
  # Telegram bot token (to be set via Hugging Face Space secrets)
13
  TELEGRAM_BOT_TOKEN = os.getenv("TELEGRAM_BOT_TOKEN")
14
 
15
- # Load the Hugging Face model and processor for transcription
16
- processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
17
- model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
18
-
19
  # Process audio and transcribe
20
  def process_audio(audio_input):
21
- # Convert all audio inputs to WAV format with 16kHz sample rate (required by wav2vec2)
 
 
22
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
23
  if isinstance(audio_input, tuple): # Recorded audio (sample_rate, numpy_array)
24
  sample_rate, audio_data = audio_input
25
- audio_segment = AudioSegment(audio_data, sample_rate=sample_rate, frame_rate=sample_rate, channels=1)
26
  else: # Uploaded audio file (file path or Telegram file)
27
- audio_segment = AudioSegment.from_file(audio_input).set_channels(1)
28
-
29
- # Resample to 16kHz (required by wav2vec2)
30
- audio_segment = audio_segment.set_frame_rate(16000)
31
- audio_segment.export(temp_file.name, format="wav")
32
  audio_file_path = temp_file.name
33
 
34
- # Load the WAV file for transcription
35
- import soundfile as sf
36
- audio_data, sample_rate = sf.read(audio_file_path)
37
- assert sample_rate == 16000, "Sample rate must be 16kHz"
38
-
39
- # Preprocess audio for the model
40
- inputs = processor(audio_data, sampling_rate=sample_rate, return_tensors="pt", padding=True)
41
-
42
- # Perform transcription
43
- with torch.no_grad():
44
- logits = model(inputs.input_values).logits
45
 
46
- # Decode the logits to text
47
- predicted_ids = torch.argmax(logits, dim=-1)
48
- transcription = processor.batch_decode(predicted_ids)[0]
 
 
 
 
 
 
49
 
50
  # Detect language
51
  try:
@@ -174,34 +167,31 @@ with gr.Blocks() as demo:
174
  outputs=[audio_file, audio_record]
175
  )
176
 
177
- # Start the Telegram bot in a separate thread with its own event loop
178
  def run_telegram_bot():
179
  if not TELEGRAM_BOT_TOKEN:
180
  print("Telegram bot token not found. Please set TELEGRAM_BOT_TOKEN in the Space secrets.")
181
  return
182
 
183
- # Create a new event loop for this thread
184
- loop = asyncio.new_event_loop()
185
- asyncio.set_event_loop(loop)
186
-
187
  application = Application.builder().token(TELEGRAM_BOT_TOKEN).build()
188
 
189
  # Add handlers
190
  application.add_handler(CommandHandler("start", start))
191
  application.add_handler(MessageHandler(filters.AUDIO, handle_audio))
192
 
193
- # Start the bot
194
  print("Starting Telegram bot...")
195
- try:
196
- loop.run_until_complete(application.run_polling(allowed_updates=Update.ALL_TYPES))
197
- finally:
198
- loop.close()
 
199
 
200
  # Launch Gradio app and Telegram bot
201
  if __name__ == "__main__":
202
  import threading
203
- # Start the Telegram bot in a separate thread
204
- bot_thread = threading.Thread(target=run_telegram_bot)
205
- bot_thread.start()
206
- # Launch Gradio app
207
- demo.launch()
 
1
  import gradio as gr
2
+ import speech_recognition as sr
3
  from pydub import AudioSegment
4
  import tempfile
5
  from langdetect import detect
6
  import os
7
  import asyncio
 
 
8
  from telegram import Update
9
  from telegram.ext import Application, CommandHandler, MessageHandler, filters, ContextTypes
10
 
11
  # Telegram bot token (to be set via Hugging Face Space secrets)
12
  TELEGRAM_BOT_TOKEN = os.getenv("TELEGRAM_BOT_TOKEN")
13
 
 
 
 
 
14
  # Process audio and transcribe
15
  def process_audio(audio_input):
16
+ recognizer = sr.Recognizer()
17
+
18
+ # Convert all audio inputs to WAV format
19
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
20
  if isinstance(audio_input, tuple): # Recorded audio (sample_rate, numpy_array)
21
  sample_rate, audio_data = audio_input
22
+ AudioSegment(audio_data, sample_rate=sample_rate, frame_rate=sample_rate, channels=1).export(temp_file.name, format="wav")
23
  else: # Uploaded audio file (file path or Telegram file)
24
+ audio = AudioSegment.from_file(audio_input)
25
+ audio = audio.set_channels(1) # Convert to mono for consistency
26
+ audio.export(temp_file.name, format="wav")
 
 
27
  audio_file_path = temp_file.name
28
 
29
+ # Debug: Check if the WAV file is valid
30
+ if os.path.getsize(audio_file_path) == 0:
31
+ raise ValueError("The converted WAV file is empty. The input audio may be corrupted.")
 
 
 
 
 
 
 
 
32
 
33
+ # Transcribe the WAV file using pocketsphinx (offline)
34
+ with sr.AudioFile(audio_file_path) as source:
35
+ audio = recognizer.record(source)
36
+ try:
37
+ transcription = recognizer.recognize_sphinx(audio) # Use pocketsphinx for offline transcription
38
+ except sr.UnknownValueError:
39
+ transcription = "Could not understand the audio."
40
+ except sr.RequestError as e:
41
+ transcription = f"Transcription failed: {str(e)}"
42
 
43
  # Detect language
44
  try:
 
167
  outputs=[audio_file, audio_record]
168
  )
169
 
170
+ # Start the Telegram bot in the main thread
171
  def run_telegram_bot():
172
  if not TELEGRAM_BOT_TOKEN:
173
  print("Telegram bot token not found. Please set TELEGRAM_BOT_TOKEN in the Space secrets.")
174
  return
175
 
 
 
 
 
176
  application = Application.builder().token(TELEGRAM_BOT_TOKEN).build()
177
 
178
  # Add handlers
179
  application.add_handler(CommandHandler("start", start))
180
  application.add_handler(MessageHandler(filters.AUDIO, handle_audio))
181
 
182
+ # Start the bot in the main thread
183
  print("Starting Telegram bot...")
184
+ asyncio.run(application.run_polling(allowed_updates=Update.ALL_TYPES))
185
+
186
+ # Launch Gradio app in a separate thread
187
+ def run_gradio():
188
+ demo.launch()
189
 
190
  # Launch Gradio app and Telegram bot
191
  if __name__ == "__main__":
192
  import threading
193
+ # Start Gradio in a separate thread
194
+ gradio_thread = threading.Thread(target=run_gradio)
195
+ gradio_thread.start()
196
+ # Run Telegram bot in the main thread
197
+ run_telegram_bot()