Spaces:
Sleeping
Sleeping
| # -*- coding: utf-8 -*- | |
| """ | |
| Created on Mon Dec 9 16:43:31 2024 | |
| @author: Pradeep Kumar | |
| """ | |
| import whisper | |
| import torch | |
| import os | |
| from flask import Flask, request, abort, jsonify, render_template | |
| from deep_translator import GoogleTranslator | |
| #%% | |
| import subprocess | |
| # List of packages to check versions for | |
| packages = ["whisper", "torch", "os", "flask", "deep-translator"] | |
| # Dictionary to store versions | |
| package_versions = {} | |
| for package in packages: | |
| try: | |
| # Run pip show to get version info | |
| result = subprocess.run( | |
| ["pip", "show", package], | |
| stdout=subprocess.PIPE, | |
| stderr=subprocess.PIPE, | |
| text=True | |
| ) | |
| if result.returncode == 0: | |
| # Parse the version from the output | |
| for line in result.stdout.splitlines(): | |
| if line.startswith("Version:"): | |
| package_versions[package] = line.split(":", 1)[1].strip() | |
| else: | |
| package_versions[package] = "Not Installed" | |
| except Exception as e: | |
| package_versions[package] = f"Error: {str(e)}" | |
| package_versions | |
| #%% | |
| # Check if NVIDIA GPU is available | |
| DEVICE = "cuda" if torch.cuda.is_available() else "cpu" | |
| # Initialize Flask app | |
| app = Flask(__name__) | |
| # Directories for transcripts | |
| BASE_DIR = os.getcwd() | |
| TRANSCRIPTS_FOLDER = os.path.join(BASE_DIR, 'transcripts') | |
| # Ensure transcripts directory exists | |
| def check_directory(path): | |
| if not os.path.exists(path): | |
| os.makedirs(path) | |
| check_directory(TRANSCRIPTS_FOLDER) | |
| def upload_page(): | |
| """ | |
| Render the upload page for audio file submission. | |
| """ | |
| return render_template('upload.html') | |
| def process_audio(): | |
| """ | |
| Process audio directly from the destination using Whisper. | |
| """ | |
| if 'audio_file' not in request.files: | |
| return abort(400, "No file part in the request.") | |
| audio_file = request.files['audio_file'] | |
| selected_language = request.form.get('language', None) | |
| model_type = request.form.get('model_type', "base") | |
| if not audio_file or audio_file.filename == '': | |
| return abort(400, "No file selected for upload.") | |
| # Save the uploaded file to a temporary location | |
| temp_audio_path = os.path.join(BASE_DIR, audio_file.filename) | |
| audio_file.save(temp_audio_path) | |
| try: | |
| # Load the Whisper model based on user selection | |
| model = whisper.load_model(model_type, device=DEVICE) | |
| except Exception as e: | |
| return jsonify({"error": f"Failed to load Whisper model ({model_type}): {e}"}), 500 | |
| try: | |
| # Transcribe with the user-selected language | |
| if selected_language: | |
| result = model.transcribe(temp_audio_path,fp16=False, language=selected_language, verbose=False) | |
| else: | |
| return abort(400, "Language selection is required.") | |
| # Save the transcription with timestamps | |
| transcript_file = os.path.join(TRANSCRIPTS_FOLDER, f"{audio_file.filename}_transcript.txt") | |
| with open(transcript_file, 'w', encoding='utf-8') as text_file: | |
| for segment in result['segments']: | |
| start_time = segment['start'] | |
| end_time = segment['end'] | |
| text = segment['text'] | |
| text_file.write(f"[{start_time:.2f} - {end_time:.2f}] {text}\n") | |
| if selected_language == 'nl': | |
| text_en = GoogleTranslator(source='auto', target='en').translate(text) | |
| text_file.write(f"[{start_time:.2f} - {end_time:.2f}] {text_en}\n") | |
| # Return the transcription metadata | |
| return jsonify({ | |
| "message": "Transcription successful!", | |
| "transcript_path": transcript_file, | |
| "transcription_preview": result['text'] | |
| }) | |
| except Exception as e: | |
| return jsonify({"error": f"Failed to process the audio file: {e}"}), 500 | |
| finally: | |
| # Clean up temporary audio file | |
| if os.path.exists(temp_audio_path): | |
| os.remove(temp_audio_path) | |
| if __name__ == '__main__': | |
| # Run the Flask application | |
| app.run(debug=True) | |