Spaces:

danicor
/

WL3

Sleeping

App Files Files Community

danicor commited on Sep 5, 2025

Commit

61872a3

verified ·

1 Parent(s): 9f0e614

Upload app.py

Browse files

Files changed (1) hide show

app.py +406 -0

app.py ADDED Viewed

	@@ -0,0 +1,406 @@

+import os
+import tempfile
+import time
+import json
+from pathlib import Path
+import uuid
+import logging
+import torch
+import yt_dlp as youtube_dl
+from flask import Flask, request, jsonify
+from transformers import pipeline
+from transformers.pipelines.audio_utils import ffmpeg_read
+import ffmpeg
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+app = Flask(__name__)
+# Configuration
+MODEL_NAME = "openai/whisper-large-v3"
+BATCH_SIZE = 8
+FILE_LIMIT_MB = 1000
+YT_LENGTH_LIMIT_S = 3600  # 1 hour limit for YouTube
+MAX_FILE_SIZE = FILE_LIMIT_MB * 1024 * 1024  # Convert to bytes
+# Device configuration
+device = 0 if torch.cuda.is_available() else "cpu"
+logger.info(f"Using device: {device}")
+# Initialize Whisper pipeline
+try:
+    pipe = pipeline(
+        task="automatic-speech-recognition",
+        model=MODEL_NAME,
+        chunk_length_s=30,
+        device=device,
+    )
+    logger.info("Whisper model loaded successfully")
+except Exception as e:
+    logger.error(f"Error loading Whisper model: {e}")
+    raise
+# Supported languages for Whisper (99 languages)
+SUPPORTED_LANGUAGES = {
+    "af": "afrikaans", "am": "amharic", "ar": "arabic", "as": "assamese", "az": "azerbaijani",
+    "ba": "bashkir", "be": "belarusian", "bg": "bulgarian", "bn": "bengali", "bo": "tibetan",
+    "br": "breton", "bs": "bosnian", "ca": "catalan", "cs": "czech", "cy": "welsh",
+    "da": "danish", "de": "german", "el": "greek", "en": "english", "es": "spanish",
+    "et": "estonian", "eu": "basque", "fa": "persian", "fi": "finnish", "fo": "faroese",
+    "fr": "french", "gl": "galician", "gu": "gujarati", "ha": "hausa", "haw": "hawaiian",
+    "he": "hebrew", "hi": "hindi", "hr": "croatian", "ht": "haitian creole", "hu": "hungarian",
+    "hy": "armenian", "id": "indonesian", "is": "icelandic", "it": "italian", "ja": "japanese",
+    "jw": "javanese", "ka": "georgian", "kk": "kazakh", "km": "khmer", "kn": "kannada",
+    "ko": "korean", "la": "latin", "lb": "luxembourgish", "ln": "lingala", "lo": "lao",
+    "lt": "lithuanian", "lv": "latvian", "mg": "malagasy", "mi": "maori", "mk": "macedonian",
+    "ml": "malayalam", "mn": "mongolian", "mr": "marathi", "ms": "malay", "mt": "maltese",
+    "my": "myanmar", "ne": "nepali", "nl": "dutch", "nn": "nynorsk", "no": "norwegian",
+    "oc": "occitan", "pa": "punjabi", "pl": "polish", "ps": "pashto", "pt": "portuguese",
+    "ro": "romanian", "ru": "russian", "sa": "sanskrit", "sd": "sindhi", "si": "sinhala",
+    "sk": "slovak", "sl": "slovenian", "sn": "shona", "so": "somali", "sq": "albanian",
+    "sr": "serbian", "su": "sundanese", "sv": "swedish", "sw": "swahili", "ta": "tamil",
+    "te": "telugu", "tg": "tajik", "th": "thai", "tk": "turkmen", "tl": "tagalog",
+    "tr": "turkish", "tt": "tatar", "uk": "ukrainian", "ur": "urdu", "uz": "uzbek",
+    "vi": "vietnamese", "yi": "yiddish", "yo": "yoruba", "zh": "chinese"
+}
+# Video formats supported
+SUPPORTED_VIDEO_FORMATS = ['.mp4', '.avi', '.mov', '.mkv', '.wmv', '.flv', '.webm', '.m4v', '.3gp']
+SUPPORTED_AUDIO_FORMATS = ['.mp3', '.wav', '.flac', '.aac', '.ogg', '.m4a', '.wma']
+def extract_audio_from_video(video_path, output_path):
+    """Extract audio from video file using ffmpeg"""
+    try:
+        (
+            ffmpeg
+            .input(video_path)
+            .output(output_path, acodec='pcm_s16le', ac=1, ar=16000)
+            .overwrite_output()
+            .run(quiet=True)
+        )
+        return True
+    except Exception as e:
+        logger.error(f"Error extracting audio: {e}")
+        return False
+def chunks_to_srt(chunks):
+    """Convert chunks to SRT format"""
+    srt_format = ""
+    for i, chunk in enumerate(chunks, 1):
+        start_time, end_time = chunk['timestamp']
+        start_time_hms = "{:02}:{:02}:{:02},{:03}".format(
+            int(start_time // 3600),
+            int((start_time % 3600) // 60),
+            int(start_time % 60),
+            int((start_time % 1) * 1000)
+        )
+        end_time_hms = "{:02}:{:02}:{:02},{:03}".format(
+            int(end_time // 3600),
+            int((end_time % 3600) // 60),
+            int(end_time % 60),
+            int((end_time % 1) * 1000)
+        )
+        srt_format += f"{i}\n{start_time_hms} --> {end_time_hms}\n{chunk['text']}\n\n"
+    return srt_format
+def download_youtube_audio(yt_url, output_path):
+    """Download audio from YouTube URL"""
+    info_loader = youtube_dl.YoutubeDL()
+    try:
+        info = info_loader.extract_info(yt_url, download=False)
+    except youtube_dl.utils.DownloadError as err:
+        raise Exception(f"YouTube extraction error: {str(err)}")
+    # Check video length
+    file_length_s = info.get("duration", 0)
+    if file_length_s > YT_LENGTH_LIMIT_S:
+        yt_length_limit_hms = time.strftime("%H:%M:%S", time.gmtime(YT_LENGTH_LIMIT_S))
+        file_length_hms = time.strftime("%H:%M:%S", time.gmtime(file_length_s))
+        raise Exception(f"Video too long. Maximum: {yt_length_limit_hms}, got: {file_length_hms}")
+    ydl_opts = {
+        "outtmpl": output_path,
+        "format": "bestaudio[ext=m4a]/bestaudio/best",
+        "extractaudio": True,
+        "audioformat": "wav",
+        "audioquality": "192K",
+    }
+    with youtube_dl.YoutubeDL(ydl_opts) as ydl:
+        try:
+            ydl.download([yt_url])
+        except youtube_dl.utils.ExtractorError as err:
+            raise Exception(f"YouTube download error: {str(err)}")
+def process_audio_file(file_path, task="transcribe", language="auto", return_timestamps=False):
+    """Process audio file with Whisper"""
+    try:
+        # Read audio file
+        with open(file_path, "rb") as f:
+            inputs = f.read()
+        # Convert to format expected by Whisper
+        inputs = ffmpeg_read(inputs, pipe.feature_extractor.sampling_rate)
+        inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
+        # Prepare generation kwargs
+        generate_kwargs = {"task": task}
+        if language != "auto" and language in SUPPORTED_LANGUAGES:
+            generate_kwargs["language"] = f"<|{language}|>"
+        # Run transcription
+        result = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs=generate_kwargs, return_timestamps=return_timestamps)
+        if return_timestamps:
+            return {
+                "text": result['text'],
+                "chunks": result['chunks'],
+                "srt": chunks_to_srt(result['chunks'])
+            }
+        else:
+            return {"text": result['text']}
+    except Exception as e:
+        logger.error(f"Error processing audio: {e}")
+        raise Exception(f"Audio processing error: {str(e)}")
+@app.route('/health', methods=['GET'])
+def health_check():
+    """Health check endpoint"""
+    return jsonify({
+        "status": "healthy",
+        "model": MODEL_NAME,
+        "device": str(device),
+        "supported_languages": list(SUPPORTED_LANGUAGES.keys())
+    })
+@app.route('/languages', methods=['GET'])
+def get_supported_languages():
+    """Get list of supported languages"""
+    return jsonify({
+        "supported_languages": SUPPORTED_LANGUAGES,
+        "total_count": len(SUPPORTED_LANGUAGES)
+    })
+@app.route('/transcribe', methods=['POST'])
+def transcribe_endpoint():
+    """Main transcription endpoint"""
+    try:
+        # Get parameters
+        task = request.form.get('task', 'transcribe')
+        language = request.form.get('language', 'auto')
+        return_timestamps = request.form.get('return_timestamps', 'false').lower() == 'true'
+        # Validate task
+        if task not in ['transcribe', 'translate']:
+            return jsonify({"error": "Task must be 'transcribe' or 'translate'"}), 400
+        # Validate language
+        if language != 'auto' and language not in SUPPORTED_LANGUAGES:
+            return jsonify({"error": f"Language '{language}' not supported"}), 400
+        with tempfile.TemporaryDirectory() as temp_dir:
+            # Handle different input types
+            if 'file' in request.files:
+                # File upload
+                file = request.files['file']
+                if file.filename == '':
+                    return jsonify({"error": "No file selected"}), 400
+                # Check file size
+                file.seek(0, os.SEEK_END)
+                file_size = file.tell()
+                file.seek(0)
+                if file_size > MAX_FILE_SIZE:
+                    return jsonify({"error": f"File too large. Maximum size: {FILE_LIMIT_MB}MB"}), 400
+                # Save uploaded file
+                file_extension = Path(file.filename).suffix.lower()
+                temp_file_path = os.path.join(temp_dir, f"upload{file_extension}")
+                file.save(temp_file_path)
+                # Process video files (extract audio)
+                if file_extension in SUPPORTED_VIDEO_FORMATS:
+                    audio_path = os.path.join(temp_dir, "extracted_audio.wav")
+                    if not extract_audio_from_video(temp_file_path, audio_path):
+                        return jsonify({"error": "Failed to extract audio from video"}), 500
+                    temp_file_path = audio_path
+                elif file_extension not in SUPPORTED_AUDIO_FORMATS:
+                    return jsonify({"error": f"Unsupported file format: {file_extension}"}), 400
+            elif 'youtube_url' in request.form:
+                # YouTube URL
+                youtube_url = request.form.get('youtube_url')
+                if not youtube_url:
+                    return jsonify({"error": "YouTube URL is required"}), 400
+                temp_file_path = os.path.join(temp_dir, "youtube_audio.%(ext)s")
+                try:
+                    download_youtube_audio(youtube_url, temp_file_path)
+                    # Find the actual downloaded file
+                    for file in os.listdir(temp_dir):
+                        if file.startswith("youtube_audio"):
+                            temp_file_path = os.path.join(temp_dir, file)
+                            break
+                except Exception as e:
+                    return jsonify({"error": str(e)}), 400
+            elif 'audio_url' in request.form:
+                # Direct audio/video URL
+                audio_url = request.form.get('audio_url')
+                if not audio_url:
+                    return jsonify({"error": "Audio URL is required"}), 400
+                # Download file from URL
+                import requests
+                try:
+                    response = requests.get(audio_url, stream=True, timeout=30)
+                    response.raise_for_status()
+                    # Determine file extension from URL or content type
+                    file_extension = Path(audio_url).suffix.lower()
+                    if not file_extension:
+                        content_type = response.headers.get('content-type', '')
+                        if 'audio' in content_type:
+                            file_extension = '.mp3'
+                        elif 'video' in content_type:
+                            file_extension = '.mp4'
+                        else:
+                            file_extension = '.mp3'  # default
+                    temp_file_path = os.path.join(temp_dir, f"download{file_extension}")
+                    with open(temp_file_path, 'wb') as f:
+                        for chunk in response.iter_content(chunk_size=8192):
+                            f.write(chunk)
+                    # Process video files (extract audio)
+                    if file_extension in SUPPORTED_VIDEO_FORMATS:
+                        audio_path = os.path.join(temp_dir, "extracted_audio.wav")
+                        if not extract_audio_from_video(temp_file_path, audio_path):
+                            return jsonify({"error": "Failed to extract audio from video"}), 500
+                        temp_file_path = audio_path
+                except requests.RequestException as e:
+                    return jsonify({"error": f"Failed to download file: {str(e)}"}), 400
+            else:
+                return jsonify({"error": "No input provided. Use 'file', 'youtube_url', or 'audio_url'"}), 400
+            # Process the audio file
+            result = process_audio_file(temp_file_path, task, language, return_timestamps)
+            return jsonify({
+                "success": True,
+                "task": task,
+                "language": language,
+                "return_timestamps": return_timestamps,
+                **result
+            })
+    except Exception as e:
+        logger.error(f"Transcription error: {e}")
+        return jsonify({"error": str(e)}), 500
+@app.route('/batch_transcribe', methods=['POST'])
+def batch_transcribe_endpoint():
+    """Batch transcription endpoint for multiple files"""
+    try:
+        files = request.files.getlist('files')
+        task = request.form.get('task', 'transcribe')
+        language = request.form.get('language', 'auto')
+        return_timestamps = request.form.get('return_timestamps', 'false').lower() == 'true'
+        if not files:
+            return jsonify({"error": "No files provided"}), 400
+        if len(files) > 10:  # Limit batch size
+            return jsonify({"error": "Maximum 10 files per batch"}), 400
+        results = []
+        for idx, file in enumerate(files):
+            try:
+                with tempfile.TemporaryDirectory() as temp_dir:
+                    # Save and process file
+                    file_extension = Path(file.filename).suffix.lower()
+                    temp_file_path = os.path.join(temp_dir, f"batch_{idx}{file_extension}")
+                    file.save(temp_file_path)
+                    # Handle video files
+                    if file_extension in SUPPORTED_VIDEO_FORMATS:
+                        audio_path = os.path.join(temp_dir, f"batch_{idx}_audio.wav")
+                        if extract_audio_from_video(temp_file_path, audio_path):
+                            temp_file_path = audio_path
+                        else:
+                            results.append({
+                                "filename": file.filename,
+                                "success": False,
+                                "error": "Failed to extract audio from video"
+                            })
+                            continue
+                    # Process audio
+                    result = process_audio_file(temp_file_path, task, language, return_timestamps)
+                    results.append({
+                        "filename": file.filename,
+                        "success": True,
+                        **result
+                    })
+            except Exception as e:
+                results.append({
+                    "filename": file.filename,
+                    "success": False,
+                    "error": str(e)
+                })
+        return jsonify({
+            "success": True,
+            "batch_size": len(files),
+            "results": results
+        })
+    except Exception as e:
+        logger.error(f"Batch transcription error: {e}")
+        return jsonify({"error": str(e)}), 500
+# Extension hooks for future plugins
+class ExtensionManager:
+    def __init__(self):
+        self.hooks = {
+            'before_transcription': [],
+            'after_transcription': [],
+            'before_translation': [],
+            'after_translation': []
+        }
+    def register_hook(self, hook_name, callback):
+        if hook_name in self.hooks:
+            self.hooks[hook_name].append(callback)
+    def run_hooks(self, hook_name, data):
+        for callback in self.hooks.get(hook_name, []):
+            try:
+                data = callback(data)
+            except Exception as e:
+                logger.error(f"Hook error in {hook_name}: {e}")
+        return data
+# Global extension manager
+extension_manager = ExtensionManager()
+@app.route('/extensions/hooks', methods=['GET'])
+def get_extension_hooks():
+    """Get available extension hooks"""
+    return jsonify({
+        "available_hooks": list(extension_manager.hooks.keys()),
+        "description": "Extension hooks for plugins like CSS customization, myCred integration, etc."
+    })
+if __name__ == '__main__':
+    app.run(host='0.0.0.0', port=7860, debug=False)