Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,51 +1,44 @@
|
|
| 1 |
import gradio as gr
|
|
|
|
| 2 |
from pydub import AudioSegment
|
| 3 |
import tempfile
|
| 4 |
from langdetect import detect
|
| 5 |
import os
|
| 6 |
import asyncio
|
| 7 |
-
import torch
|
| 8 |
-
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
|
| 9 |
from telegram import Update
|
| 10 |
from telegram.ext import Application, CommandHandler, MessageHandler, filters, ContextTypes
|
| 11 |
|
| 12 |
# Telegram bot token (to be set via Hugging Face Space secrets)
|
| 13 |
TELEGRAM_BOT_TOKEN = os.getenv("TELEGRAM_BOT_TOKEN")
|
| 14 |
|
| 15 |
-
# Load the Hugging Face model and processor for transcription
|
| 16 |
-
processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
|
| 17 |
-
model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
|
| 18 |
-
|
| 19 |
# Process audio and transcribe
|
| 20 |
def process_audio(audio_input):
|
| 21 |
-
|
|
|
|
|
|
|
| 22 |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
|
| 23 |
if isinstance(audio_input, tuple): # Recorded audio (sample_rate, numpy_array)
|
| 24 |
sample_rate, audio_data = audio_input
|
| 25 |
-
|
| 26 |
else: # Uploaded audio file (file path or Telegram file)
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
audio_segment = audio_segment.set_frame_rate(16000)
|
| 31 |
-
audio_segment.export(temp_file.name, format="wav")
|
| 32 |
audio_file_path = temp_file.name
|
| 33 |
|
| 34 |
-
#
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
assert sample_rate == 16000, "Sample rate must be 16kHz"
|
| 38 |
-
|
| 39 |
-
# Preprocess audio for the model
|
| 40 |
-
inputs = processor(audio_data, sampling_rate=sample_rate, return_tensors="pt", padding=True)
|
| 41 |
-
|
| 42 |
-
# Perform transcription
|
| 43 |
-
with torch.no_grad():
|
| 44 |
-
logits = model(inputs.input_values).logits
|
| 45 |
|
| 46 |
-
#
|
| 47 |
-
|
| 48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
|
| 50 |
# Detect language
|
| 51 |
try:
|
|
@@ -174,34 +167,31 @@ with gr.Blocks() as demo:
|
|
| 174 |
outputs=[audio_file, audio_record]
|
| 175 |
)
|
| 176 |
|
| 177 |
-
# Start the Telegram bot in
|
| 178 |
def run_telegram_bot():
|
| 179 |
if not TELEGRAM_BOT_TOKEN:
|
| 180 |
print("Telegram bot token not found. Please set TELEGRAM_BOT_TOKEN in the Space secrets.")
|
| 181 |
return
|
| 182 |
|
| 183 |
-
# Create a new event loop for this thread
|
| 184 |
-
loop = asyncio.new_event_loop()
|
| 185 |
-
asyncio.set_event_loop(loop)
|
| 186 |
-
|
| 187 |
application = Application.builder().token(TELEGRAM_BOT_TOKEN).build()
|
| 188 |
|
| 189 |
# Add handlers
|
| 190 |
application.add_handler(CommandHandler("start", start))
|
| 191 |
application.add_handler(MessageHandler(filters.AUDIO, handle_audio))
|
| 192 |
|
| 193 |
-
# Start the bot
|
| 194 |
print("Starting Telegram bot...")
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
|
|
|
| 199 |
|
| 200 |
# Launch Gradio app and Telegram bot
|
| 201 |
if __name__ == "__main__":
|
| 202 |
import threading
|
| 203 |
-
# Start
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
#
|
| 207 |
-
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
+
import speech_recognition as sr
|
| 3 |
from pydub import AudioSegment
|
| 4 |
import tempfile
|
| 5 |
from langdetect import detect
|
| 6 |
import os
|
| 7 |
import asyncio
|
|
|
|
|
|
|
| 8 |
from telegram import Update
|
| 9 |
from telegram.ext import Application, CommandHandler, MessageHandler, filters, ContextTypes
|
| 10 |
|
| 11 |
# Telegram bot token (to be set via Hugging Face Space secrets)
|
| 12 |
TELEGRAM_BOT_TOKEN = os.getenv("TELEGRAM_BOT_TOKEN")
|
| 13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
# Process audio and transcribe
|
| 15 |
def process_audio(audio_input):
|
| 16 |
+
recognizer = sr.Recognizer()
|
| 17 |
+
|
| 18 |
+
# Convert all audio inputs to WAV format
|
| 19 |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
|
| 20 |
if isinstance(audio_input, tuple): # Recorded audio (sample_rate, numpy_array)
|
| 21 |
sample_rate, audio_data = audio_input
|
| 22 |
+
AudioSegment(audio_data, sample_rate=sample_rate, frame_rate=sample_rate, channels=1).export(temp_file.name, format="wav")
|
| 23 |
else: # Uploaded audio file (file path or Telegram file)
|
| 24 |
+
audio = AudioSegment.from_file(audio_input)
|
| 25 |
+
audio = audio.set_channels(1) # Convert to mono for consistency
|
| 26 |
+
audio.export(temp_file.name, format="wav")
|
|
|
|
|
|
|
| 27 |
audio_file_path = temp_file.name
|
| 28 |
|
| 29 |
+
# Debug: Check if the WAV file is valid
|
| 30 |
+
if os.path.getsize(audio_file_path) == 0:
|
| 31 |
+
raise ValueError("The converted WAV file is empty. The input audio may be corrupted.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
+
# Transcribe the WAV file using pocketsphinx (offline)
|
| 34 |
+
with sr.AudioFile(audio_file_path) as source:
|
| 35 |
+
audio = recognizer.record(source)
|
| 36 |
+
try:
|
| 37 |
+
transcription = recognizer.recognize_sphinx(audio) # Use pocketsphinx for offline transcription
|
| 38 |
+
except sr.UnknownValueError:
|
| 39 |
+
transcription = "Could not understand the audio."
|
| 40 |
+
except sr.RequestError as e:
|
| 41 |
+
transcription = f"Transcription failed: {str(e)}"
|
| 42 |
|
| 43 |
# Detect language
|
| 44 |
try:
|
|
|
|
| 167 |
outputs=[audio_file, audio_record]
|
| 168 |
)
|
| 169 |
|
| 170 |
+
# Start the Telegram bot in the main thread
|
| 171 |
def run_telegram_bot():
|
| 172 |
if not TELEGRAM_BOT_TOKEN:
|
| 173 |
print("Telegram bot token not found. Please set TELEGRAM_BOT_TOKEN in the Space secrets.")
|
| 174 |
return
|
| 175 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
application = Application.builder().token(TELEGRAM_BOT_TOKEN).build()
|
| 177 |
|
| 178 |
# Add handlers
|
| 179 |
application.add_handler(CommandHandler("start", start))
|
| 180 |
application.add_handler(MessageHandler(filters.AUDIO, handle_audio))
|
| 181 |
|
| 182 |
+
# Start the bot in the main thread
|
| 183 |
print("Starting Telegram bot...")
|
| 184 |
+
asyncio.run(application.run_polling(allowed_updates=Update.ALL_TYPES))
|
| 185 |
+
|
| 186 |
+
# Launch Gradio app in a separate thread
|
| 187 |
+
def run_gradio():
|
| 188 |
+
demo.launch()
|
| 189 |
|
| 190 |
# Launch Gradio app and Telegram bot
|
| 191 |
if __name__ == "__main__":
|
| 192 |
import threading
|
| 193 |
+
# Start Gradio in a separate thread
|
| 194 |
+
gradio_thread = threading.Thread(target=run_gradio)
|
| 195 |
+
gradio_thread.start()
|
| 196 |
+
# Run Telegram bot in the main thread
|
| 197 |
+
run_telegram_bot()
|