|
|
import os |
|
|
import tempfile |
|
|
import time |
|
|
import json |
|
|
import threading |
|
|
import gc |
|
|
from pathlib import Path |
|
|
import uuid |
|
|
import logging |
|
|
from datetime import datetime, timedelta |
|
|
import asyncio |
|
|
from concurrent.futures import ThreadPoolExecutor |
|
|
|
|
|
import torch |
|
|
import yt_dlp as youtube_dlp |
|
|
from flask import Flask, request, jsonify |
|
|
from transformers import pipeline, WhisperProcessor, WhisperForConditionalGeneration |
|
|
from transformers.pipelines.audio_utils import ffmpeg_read |
|
|
import ffmpeg |
|
|
import librosa |
|
|
import numpy as np |
|
|
|
|
|
|
|
|
logging.basicConfig(level=logging.INFO) |
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
app = Flask(__name__) |
|
|
|
|
|
|
|
|
MODEL_NAME = "openai/whisper-large-v3" |
|
|
BATCH_SIZE = 16 |
|
|
FILE_LIMIT_MB = 1000 |
|
|
YT_LENGTH_LIMIT_S = 3600 |
|
|
MAX_FILE_SIZE = FILE_LIMIT_MB * 1024 * 1024 |
|
|
MODEL_TIMEOUT_MINUTES = 60 |
|
|
CHUNK_LENGTH = 30 |
|
|
MAX_WORKERS = 4 |
|
|
|
|
|
|
|
|
|
|
|
device = 0 if torch.cuda.is_available() else "cpu" |
|
|
logger.info(f"Using device: {device}") |
|
|
|
|
|
|
|
|
if torch.cuda.is_available(): |
|
|
torch.backends.cudnn.benchmark = True |
|
|
torch.backends.cuda.matmul.allow_tf32 = True |
|
|
torch.backends.cudnn.allow_tf32 = True |
|
|
|
|
|
class OptimizedModelManager: |
|
|
def __init__(self): |
|
|
self.pipe = None |
|
|
self.processor = None |
|
|
self.model = None |
|
|
self.last_used = None |
|
|
self.model_lock = threading.Lock() |
|
|
self.cleanup_timer = None |
|
|
self.is_loading = False |
|
|
self.thread_pool = ThreadPoolExecutor(max_workers=MAX_WORKERS) |
|
|
|
|
|
def load_model(self): |
|
|
"""بارگذاری بهینه شده مدل""" |
|
|
with self.model_lock: |
|
|
if self.pipe is not None: |
|
|
self.last_used = datetime.now() |
|
|
return self.pipe |
|
|
|
|
|
if self.is_loading: |
|
|
while self.is_loading: |
|
|
time.sleep(0.5) |
|
|
return self.pipe |
|
|
|
|
|
try: |
|
|
self.is_loading = True |
|
|
logger.info("Loading optimized Whisper model...") |
|
|
|
|
|
|
|
|
self.processor = WhisperProcessor.from_pretrained(MODEL_NAME) |
|
|
self.model = WhisperForConditionalGeneration.from_pretrained( |
|
|
MODEL_NAME, |
|
|
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, |
|
|
device_map="auto" if torch.cuda.is_available() else None, |
|
|
use_cache=True |
|
|
).to(device) |
|
|
|
|
|
|
|
|
if torch.cuda.is_available(): |
|
|
self.model.half() |
|
|
|
|
|
|
|
|
self.pipe = pipeline( |
|
|
task="automatic-speech-recognition", |
|
|
model=self.model, |
|
|
tokenizer=self.processor.tokenizer, |
|
|
feature_extractor=self.processor.feature_extractor, |
|
|
chunk_length_s=CHUNK_LENGTH, |
|
|
device=device, |
|
|
dtype=torch.float16 if torch.cuda.is_available() else torch.float32, |
|
|
model_kwargs={ |
|
|
"use_flash_attention_2": True if hasattr(self.model.config, 'use_flash_attention_2') else False |
|
|
} |
|
|
) |
|
|
|
|
|
self.last_used = datetime.now() |
|
|
self.start_cleanup_timer() |
|
|
logger.info("Optimized Whisper model loaded successfully") |
|
|
|
|
|
except Exception as e: |
|
|
logger.error(f"Error loading Whisper model: {e}") |
|
|
self.pipe = None |
|
|
raise |
|
|
finally: |
|
|
self.is_loading = False |
|
|
|
|
|
return self.pipe |
|
|
|
|
|
def get_model(self): |
|
|
"""دریافت مدل بهینه شده""" |
|
|
if self.pipe is None: |
|
|
return self.load_model() |
|
|
|
|
|
self.last_used = datetime.now() |
|
|
return self.pipe |
|
|
|
|
|
def cleanup_model(self): |
|
|
"""پاکسازی کامل مدل""" |
|
|
with self.model_lock: |
|
|
if self.pipe is not None: |
|
|
logger.info("Cleaning up model from memory...") |
|
|
del self.pipe |
|
|
del self.model |
|
|
del self.processor |
|
|
self.pipe = None |
|
|
self.model = None |
|
|
self.processor = None |
|
|
|
|
|
if torch.cuda.is_available(): |
|
|
torch.cuda.empty_cache() |
|
|
torch.cuda.synchronize() |
|
|
|
|
|
gc.collect() |
|
|
logger.info("Model cleanup completed") |
|
|
|
|
|
if self.cleanup_timer: |
|
|
self.cleanup_timer.cancel() |
|
|
self.cleanup_timer = None |
|
|
|
|
|
def start_cleanup_timer(self): |
|
|
"""شروع تایمر پاکسازی""" |
|
|
if self.cleanup_timer: |
|
|
self.cleanup_timer.cancel() |
|
|
|
|
|
self.cleanup_timer = threading.Timer( |
|
|
MODEL_TIMEOUT_MINUTES * 60, |
|
|
self.check_and_cleanup |
|
|
) |
|
|
self.cleanup_timer.start() |
|
|
|
|
|
def check_and_cleanup(self): |
|
|
"""بررسی و پاکسازی مدل""" |
|
|
with self.model_lock: |
|
|
if self.last_used and self.pipe: |
|
|
time_diff = datetime.now() - self.last_used |
|
|
if time_diff > timedelta(minutes=MODEL_TIMEOUT_MINUTES): |
|
|
self.cleanup_model() |
|
|
else: |
|
|
remaining_time = MODEL_TIMEOUT_MINUTES * 60 - time_diff.total_seconds() |
|
|
self.cleanup_timer = threading.Timer(remaining_time, self.check_and_cleanup) |
|
|
self.cleanup_timer.start() |
|
|
|
|
|
|
|
|
|
|
|
model_manager = OptimizedModelManager() |
|
|
|
|
|
|
|
|
SUPPORTED_LANGUAGES = { |
|
|
"af": "afrikaans", "am": "amharic", "ar": "arabic", "as": "assamese", "az": "azerbaijani", |
|
|
"ba": "bashkir", "be": "belarusian", "bg": "bulgarian", "bn": "bengali", "bo": "tibetan", |
|
|
"br": "breton", "bs": "bosnian", "ca": "catalan", "cs": "czech", "cy": "welsh", |
|
|
"da": "danish", "de": "german", "el": "greek", "en": "english", "es": "spanish", |
|
|
"et": "estonian", "eu": "basque", "fa": "persian", "fi": "finnish", "fo": "faroese", |
|
|
"fr": "french", "gl": "galician", "gu": "gujarati", "ha": "hausa", "haw": "hawaiian", |
|
|
"he": "hebrew", "hi": "hindi", "hr": "croatian", "ht": "haitian creole", "hu": "hungarian", |
|
|
"hy": "armenian", "id": "indonesian", "is": "icelandic", "it": "italian", "ja": "japanese", |
|
|
"jw": "javanese", "ka": "georgian", "kk": "kazakh", "km": "khmer", "kn": "kannada", |
|
|
"ko": "korean", "la": "latin", "lb": "luxembourgish", "ln": "lingala", "lo": "lao", |
|
|
"lt": "lithuanian", "lv": "latvian", "mg": "malagasy", "mi": "maori", "mk": "macedonian", |
|
|
"ml": "malayalam", "mn": "mongolian", "mr": "marathi", "ms": "malay", "mt": "maltese", |
|
|
"my": "myanmar", "ne": "nepali", "nl": "dutch", "nn": "nynorsk", "no": "norwegian", |
|
|
"oc": "occitan", "pa": "punjabi", "pl": "polish", "ps": "pashto", "pt": "portuguese", |
|
|
"ro": "romanian", "ru": "russian", "sa": "sanskrit", "sd": "sindhi", "si": "sinhala", |
|
|
"sk": "slovak", "sl": "slovenian", "sn": "shona", "so": "somali", "sq": "albanian", |
|
|
"sr": "serbian", "su": "sundanese", "sv": "swedish", "sw": "swahili", "ta": "tamil", |
|
|
"te": "telugu", "tg": "tajik", "th": "thai", "tk": "turkmen", "tl": "tagalog", |
|
|
"tr": "turkish", "tt": "tatar", "uk": "ukrainian", "ur": "urdu", "uz": "uzbek", |
|
|
"vi": "vietnamese", "yi": "yiddish", "yo": "yoruba", "zh": "chinese" |
|
|
} |
|
|
|
|
|
|
|
|
SUPPORTED_VIDEO_FORMATS = ['.mp4', '.avi', '.mov', '.mkv', '.wmv', '.flv', '.webm', '.m4v', '.3gp'] |
|
|
SUPPORTED_AUDIO_FORMATS = ['.mp3', '.wav', '.flac', '.aac', '.ogg', '.m4a', '.wma'] |
|
|
|
|
|
def fast_audio_preprocessing(file_path): |
|
|
"""پردازش سریع فایل صوتی""" |
|
|
try: |
|
|
|
|
|
try: |
|
|
import librosa.cache |
|
|
librosa.cache.clear() |
|
|
librosa.cache.set_cache(None) |
|
|
logger.info("Librosa cache disabled successfully") |
|
|
except (ImportError, AttributeError) as cache_error: |
|
|
logger.info(f"Librosa cache module not available: {cache_error}") |
|
|
|
|
|
|
|
|
audio, sr = librosa.load(file_path, sr=16000, mono=True) |
|
|
|
|
|
|
|
|
audio = librosa.util.normalize(audio) |
|
|
|
|
|
|
|
|
audio, _ = librosa.effects.trim(audio, top_db=20) |
|
|
|
|
|
return audio, sr |
|
|
except Exception as e: |
|
|
logger.error(f"Error in fast audio preprocessing: {e}") |
|
|
|
|
|
try: |
|
|
with open(file_path, "rb") as f: |
|
|
inputs = f.read() |
|
|
return ffmpeg_read(inputs, 16000), 16000 |
|
|
except Exception as ffmpeg_error: |
|
|
logger.error(f"FFmpeg fallback also failed: {ffmpeg_error}") |
|
|
raise Exception("Both librosa and ffmpeg audio processing failed") |
|
|
|
|
|
def extract_audio_from_video_fast(video_path, output_path): |
|
|
"""استخراج سریع صدا از ویدیو""" |
|
|
try: |
|
|
( |
|
|
ffmpeg |
|
|
.input(video_path) |
|
|
.output( |
|
|
output_path, |
|
|
acodec='pcm_s16le', |
|
|
ac=1, |
|
|
ar=16000, |
|
|
**{'threads': '0', 'preset': 'ultrafast'} |
|
|
) |
|
|
.overwrite_output() |
|
|
.run(quiet=True, capture_stdout=True) |
|
|
) |
|
|
return True |
|
|
except Exception as e: |
|
|
logger.error(f"Error in fast audio extraction: {e}") |
|
|
return False |
|
|
|
|
|
def parallel_chunk_processing(audio_chunks, pipe, task, language): |
|
|
"""پردازش موازی چانکها""" |
|
|
results = [] |
|
|
|
|
|
for chunk_data in audio_chunks: |
|
|
chunk, start_time = chunk_data |
|
|
try: |
|
|
inputs = {"array": chunk, "sampling_rate": 16000} |
|
|
|
|
|
generate_kwargs = { |
|
|
"task": task, |
|
|
"do_sample": False, |
|
|
"num_beams": 1, |
|
|
"use_cache": True, |
|
|
} |
|
|
|
|
|
if language != "auto" and language in SUPPORTED_LANGUAGES: |
|
|
generate_kwargs["language"] = f"<|{language}|>" |
|
|
|
|
|
result = pipe( |
|
|
inputs, |
|
|
batch_size=BATCH_SIZE, |
|
|
generate_kwargs=generate_kwargs, |
|
|
return_timestamps=True |
|
|
) |
|
|
|
|
|
|
|
|
if result.get('chunks'): |
|
|
for chunk_result in result['chunks']: |
|
|
if chunk_result.get('timestamp') and chunk_result['timestamp'][0] is not None and chunk_result['timestamp'][1] is not None: |
|
|
chunk_result['timestamp'] = ( |
|
|
chunk_result['timestamp'][0] + start_time, |
|
|
chunk_result['timestamp'][1] + start_time |
|
|
) |
|
|
else: |
|
|
|
|
|
chunk_duration = len(chunk) / 16000 |
|
|
chunk_result['timestamp'] = (start_time, start_time + chunk_duration) |
|
|
|
|
|
results.append(result) |
|
|
|
|
|
except Exception as e: |
|
|
logger.error(f"Error processing chunk: {e}") |
|
|
|
|
|
results.append({"text": "", "chunks": []}) |
|
|
|
|
|
return results |
|
|
|
|
|
def chunks_to_srt(chunks): |
|
|
"""تبدیل سریع چانکها به SRT""" |
|
|
if not chunks or len(chunks) == 0: |
|
|
return "" |
|
|
|
|
|
srt_format = "" |
|
|
for i, chunk in enumerate(chunks, 1): |
|
|
if not isinstance(chunk, dict) or not chunk.get('timestamp'): |
|
|
continue |
|
|
|
|
|
try: |
|
|
start_time, end_time = chunk['timestamp'] |
|
|
|
|
|
if start_time is None or end_time is None: |
|
|
continue |
|
|
|
|
|
start_time_hms = "{:02}:{:02}:{:02},{:03}".format( |
|
|
int(start_time // 3600), |
|
|
int((start_time % 3600) // 60), |
|
|
int(start_time % 60), |
|
|
int((start_time % 1) * 1000) |
|
|
) |
|
|
end_time_hms = "{:02}:{:02}:{:02},{:03}".format( |
|
|
int(end_time // 3600), |
|
|
int((end_time % 3600) // 60), |
|
|
int(end_time % 60), |
|
|
int((end_time % 1) * 1000) |
|
|
) |
|
|
text = chunk.get('text', '').strip() |
|
|
if text: |
|
|
srt_format += f"{i}\n{start_time_hms} --> {end_time_hms}\n{text}\n\n" |
|
|
except (ValueError, TypeError, KeyError) as e: |
|
|
logger.warning(f"Error processing chunk {i}: {e}") |
|
|
continue |
|
|
|
|
|
return srt_format |
|
|
|
|
|
def download_youtube_audio_fast(yt_url, output_path): |
|
|
"""دانلود سریع صدا از YouTube""" |
|
|
info_loader = youtube_dlp.YoutubeDL({'quiet': True}) |
|
|
|
|
|
try: |
|
|
info = info_loader.extract_info(yt_url, download=False) |
|
|
except youtube_dlp.utils.DownloadError as err: |
|
|
raise Exception(f"YouTube extraction error: {str(err)}") |
|
|
|
|
|
|
|
|
file_length_s = info.get("duration", 0) |
|
|
if file_length_s > YT_LENGTH_LIMIT_S: |
|
|
yt_length_limit_hms = time.strftime("%H:%M:%S", time.gmtime(YT_LENGTH_LIMIT_S)) |
|
|
file_length_hms = time.strftime("%H:%M:%S", time.gmtime(file_length_s)) |
|
|
raise Exception(f"Video too long. Maximum: {yt_length_limit_hms}, got: {file_length_hms}") |
|
|
|
|
|
ydl_opts = { |
|
|
"outtmpl": output_path, |
|
|
"format": "bestaudio[ext=m4a]/bestaudio/best", |
|
|
"extractaudio": True, |
|
|
"audioformat": "wav", |
|
|
"audioquality": "96K", |
|
|
"quiet": True, |
|
|
"no_warnings": True, |
|
|
} |
|
|
|
|
|
with youtube_dlp.YoutubeDL(ydl_opts) as ydl: |
|
|
try: |
|
|
ydl.download([yt_url]) |
|
|
except youtube_dlp.utils.ExtractorError as err: |
|
|
raise Exception(f"YouTube download error: {str(err)}") |
|
|
|
|
|
def process_audio_file_optimized(file_path, task="transcribe", language="auto", return_timestamps=False): |
|
|
"""پردازش بهینه شده فایل صوتی""" |
|
|
try: |
|
|
start_time = time.time() |
|
|
pipe = model_manager.get_model() |
|
|
|
|
|
logger.info(f"Starting audio processing for: {file_path}") |
|
|
|
|
|
|
|
|
audio, sr = fast_audio_preprocessing(file_path) |
|
|
logger.info(f"Audio loaded: {len(audio)} samples at {sr}Hz") |
|
|
|
|
|
if audio is None: |
|
|
raise Exception("Audio preprocessing returned None") |
|
|
|
|
|
inputs = {"array": audio, "sampling_rate": sr} |
|
|
|
|
|
|
|
|
generate_kwargs = { |
|
|
"task": task, |
|
|
"do_sample": False, |
|
|
"num_beams": 1, |
|
|
"use_cache": True, |
|
|
} |
|
|
|
|
|
if language != "auto" and language in SUPPORTED_LANGUAGES: |
|
|
generate_kwargs["language"] = f"<|{language}|>" |
|
|
|
|
|
|
|
|
result = pipe( |
|
|
inputs, |
|
|
batch_size=BATCH_SIZE, |
|
|
generate_kwargs=generate_kwargs, |
|
|
return_timestamps=return_timestamps |
|
|
) |
|
|
|
|
|
processing_time = time.time() - start_time |
|
|
logger.info(f"Audio processing completed in {processing_time:.2f} seconds") |
|
|
|
|
|
if return_timestamps: |
|
|
|
|
|
valid_chunks = [] |
|
|
if result.get('chunks'): |
|
|
for chunk in result['chunks']: |
|
|
if chunk.get('timestamp') and chunk['timestamp'][0] is not None and chunk['timestamp'][1] is not None: |
|
|
valid_chunks.append(chunk) |
|
|
else: |
|
|
logger.warning("Skipping chunk with invalid timestamp") |
|
|
|
|
|
return { |
|
|
"text": result['text'], |
|
|
"chunks": valid_chunks, |
|
|
"srt": chunks_to_srt(valid_chunks) |
|
|
} |
|
|
else: |
|
|
return {"text": result['text']} |
|
|
|
|
|
except Exception as e: |
|
|
logger.error(f"Error processing audio: {e}") |
|
|
raise Exception(f"Audio processing error: {str(e)}") |
|
|
|
|
|
|
|
|
|
|
|
@app.route('/health', methods=['GET']) |
|
|
def health_check(): |
|
|
"""Health check endpoint""" |
|
|
model_status = "loaded" if model_manager.pipe is not None else "not_loaded" |
|
|
return jsonify({ |
|
|
"status": "healthy", |
|
|
"model": MODEL_NAME, |
|
|
"device": str(device), |
|
|
"model_status": model_status, |
|
|
"model_timeout_minutes": MODEL_TIMEOUT_MINUTES, |
|
|
"optimization": { |
|
|
"fp16": torch.cuda.is_available(), |
|
|
"batch_size": BATCH_SIZE, |
|
|
"chunk_length": CHUNK_LENGTH, |
|
|
"max_workers": MAX_WORKERS |
|
|
}, |
|
|
"supported_languages": list(SUPPORTED_LANGUAGES.keys()) |
|
|
}) |
|
|
|
|
|
@app.route('/model/status', methods=['GET']) |
|
|
def model_status(): |
|
|
"""وضعیت مدل""" |
|
|
is_loaded = model_manager.pipe is not None |
|
|
last_used = model_manager.last_used.isoformat() if model_manager.last_used else None |
|
|
|
|
|
return jsonify({ |
|
|
"model_loaded": is_loaded, |
|
|
"last_used": last_used, |
|
|
"timeout_minutes": MODEL_TIMEOUT_MINUTES, |
|
|
"is_loading": model_manager.is_loading, |
|
|
"optimization_enabled": True |
|
|
}) |
|
|
|
|
|
@app.route('/model/preload', methods=['POST']) |
|
|
def preload_model(): |
|
|
"""پیشبارگذاری مدل""" |
|
|
try: |
|
|
start_time = time.time() |
|
|
model_manager.get_model() |
|
|
load_time = time.time() - start_time |
|
|
return jsonify({ |
|
|
"success": True, |
|
|
"message": "Optimized model preloaded successfully", |
|
|
"load_time": f"{load_time:.2f} seconds" |
|
|
}) |
|
|
except Exception as e: |
|
|
return jsonify({ |
|
|
"success": False, |
|
|
"error": str(e) |
|
|
}), 500 |
|
|
|
|
|
@app.route('/model/unload', methods=['POST']) |
|
|
def unload_model(): |
|
|
"""پاکسازی دستی مدل""" |
|
|
model_manager.cleanup_model() |
|
|
return jsonify({ |
|
|
"success": True, |
|
|
"message": "Model unloaded from memory" |
|
|
}) |
|
|
|
|
|
@app.route('/languages', methods=['GET']) |
|
|
def get_supported_languages(): |
|
|
"""Get list of supported languages""" |
|
|
return jsonify({ |
|
|
"supported_languages": SUPPORTED_LANGUAGES, |
|
|
"total_count": len(SUPPORTED_LANGUAGES) |
|
|
}) |
|
|
|
|
|
@app.route('/transcribe', methods=['POST']) |
|
|
def transcribe_endpoint(): |
|
|
"""Main transcription endpoint - optimized""" |
|
|
try: |
|
|
start_time = time.time() |
|
|
|
|
|
|
|
|
task = request.form.get('task', 'transcribe') |
|
|
language = request.form.get('language', 'auto') |
|
|
return_timestamps = request.form.get('return_timestamps', 'false').lower() == 'true' |
|
|
|
|
|
|
|
|
if task not in ['transcribe', 'translate']: |
|
|
return jsonify({"error": "Task must be 'transcribe' or 'translate'"}), 400 |
|
|
|
|
|
if language != 'auto' and language not in SUPPORTED_LANGUAGES: |
|
|
return jsonify({"error": f"Language '{language}' not supported"}), 400 |
|
|
|
|
|
with tempfile.TemporaryDirectory() as temp_dir: |
|
|
|
|
|
if 'file' in request.files: |
|
|
|
|
|
file = request.files['file'] |
|
|
if file.filename == '': |
|
|
return jsonify({"error": "No file selected"}), 400 |
|
|
|
|
|
|
|
|
file.seek(0, os.SEEK_END) |
|
|
file_size = file.tell() |
|
|
file.seek(0) |
|
|
|
|
|
if file_size > MAX_FILE_SIZE: |
|
|
return jsonify({"error": f"File too large. Maximum size: {FILE_LIMIT_MB}MB"}), 400 |
|
|
|
|
|
|
|
|
file_extension = Path(file.filename).suffix.lower() |
|
|
temp_file_path = os.path.join(temp_dir, f"upload{file_extension}") |
|
|
file.save(temp_file_path) |
|
|
|
|
|
|
|
|
if file_extension in SUPPORTED_VIDEO_FORMATS: |
|
|
audio_path = os.path.join(temp_dir, "extracted_audio.wav") |
|
|
if not extract_audio_from_video_fast(temp_file_path, audio_path): |
|
|
return jsonify({"error": "Failed to extract audio from video"}), 500 |
|
|
temp_file_path = audio_path |
|
|
elif file_extension not in SUPPORTED_AUDIO_FORMATS: |
|
|
return jsonify({"error": f"Unsupported file format: {file_extension}"}), 400 |
|
|
|
|
|
elif 'youtube_url' in request.form: |
|
|
|
|
|
youtube_url = request.form.get('youtube_url') |
|
|
if not youtube_url: |
|
|
return jsonify({"error": "YouTube URL is required"}), 400 |
|
|
|
|
|
temp_file_path = os.path.join(temp_dir, "youtube_audio.%(ext)s") |
|
|
try: |
|
|
download_youtube_audio_fast(youtube_url, temp_file_path) |
|
|
|
|
|
for file in os.listdir(temp_dir): |
|
|
if file.startswith("youtube_audio"): |
|
|
temp_file_path = os.path.join(temp_dir, file) |
|
|
break |
|
|
except Exception as e: |
|
|
return jsonify({"error": str(e)}), 400 |
|
|
|
|
|
elif 'audio_url' in request.form: |
|
|
|
|
|
audio_url = request.form.get('audio_url') |
|
|
if not audio_url: |
|
|
return jsonify({"error": "Audio URL is required"}), 400 |
|
|
|
|
|
import requests |
|
|
try: |
|
|
response = requests.get(audio_url, stream=True, timeout=30) |
|
|
response.raise_for_status() |
|
|
|
|
|
file_extension = Path(audio_url).suffix.lower() |
|
|
if not file_extension: |
|
|
content_type = response.headers.get('content-type', '') |
|
|
if 'audio' in content_type: |
|
|
file_extension = '.mp3' |
|
|
elif 'video' in content_type: |
|
|
file_extension = '.mp4' |
|
|
else: |
|
|
file_extension = '.mp3' |
|
|
|
|
|
temp_file_path = os.path.join(temp_dir, f"download{file_extension}") |
|
|
|
|
|
with open(temp_file_path, 'wb') as f: |
|
|
for chunk in response.iter_content(chunk_size=8192): |
|
|
f.write(chunk) |
|
|
|
|
|
|
|
|
if file_extension in SUPPORTED_VIDEO_FORMATS: |
|
|
audio_path = os.path.join(temp_dir, "extracted_audio.wav") |
|
|
if not extract_audio_from_video_fast(temp_file_path, audio_path): |
|
|
return jsonify({"error": "Failed to extract audio from video"}), 500 |
|
|
temp_file_path = audio_path |
|
|
|
|
|
except requests.RequestException as e: |
|
|
return jsonify({"error": f"Failed to download file: {str(e)}"}), 400 |
|
|
else: |
|
|
return jsonify({"error": "No input provided. Use 'file', 'youtube_url', or 'audio_url'"}), 400 |
|
|
|
|
|
|
|
|
result = process_audio_file_optimized(temp_file_path, task, language, return_timestamps) |
|
|
|
|
|
total_time = time.time() - start_time |
|
|
|
|
|
return jsonify({ |
|
|
"success": True, |
|
|
"task": task, |
|
|
"language": language, |
|
|
"return_timestamps": return_timestamps, |
|
|
"processing_time": f"{total_time:.2f} seconds", |
|
|
**result |
|
|
}) |
|
|
|
|
|
except Exception as e: |
|
|
logger.error(f"Transcription error: {e}") |
|
|
return jsonify({"error": str(e)}), 500 |
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
|
try: |
|
|
app.run(host='0.0.0.0', port=7860, debug=False, threaded=True) |
|
|
finally: |
|
|
|
|
|
model_manager.cleanup_model() |