Update app.py
Browse files
app.py
CHANGED
|
@@ -12,6 +12,7 @@ import requests
|
|
| 12 |
import mimetypes
|
| 13 |
import urllib.parse
|
| 14 |
import subprocess
|
|
|
|
| 15 |
|
| 16 |
# Configure logging
|
| 17 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
|
@@ -94,6 +95,15 @@ def transcribe_audio_chunks(chunks):
|
|
| 94 |
os.unlink(temp_audio_file.name)
|
| 95 |
return ' '.join(transcriptions)
|
| 96 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
def download_file(url):
|
| 98 |
local_filename = url.split('/')[-1]
|
| 99 |
with requests.get(url, stream=True) as r:
|
|
@@ -126,10 +136,27 @@ def process_media(file_path, is_url=False):
|
|
| 126 |
temp_file = temp_file.name
|
| 127 |
logger.info(f"Uploaded file saved: {temp_file}")
|
| 128 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
# Convert to WAV using ffmpeg
|
| 130 |
wav_path = tempfile.NamedTemporaryFile(delete=False, suffix='.wav').name
|
| 131 |
try:
|
| 132 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
logger.info(f"Audio extracted to WAV: {wav_path}")
|
| 134 |
except subprocess.CalledProcessError as e:
|
| 135 |
logger.error(f"FFmpeg conversion failed: {str(e)}")
|
|
@@ -157,7 +184,7 @@ def process_media(file_path, is_url=False):
|
|
| 157 |
os.unlink(temp_file)
|
| 158 |
if wav_path and os.path.exists(wav_path):
|
| 159 |
os.unlink(wav_path)
|
| 160 |
-
|
| 161 |
@app.callback(
|
| 162 |
[Output('output-media-upload', 'children'),
|
| 163 |
Output('transcription-status', 'children'),
|
|
|
|
| 12 |
import mimetypes
|
| 13 |
import urllib.parse
|
| 14 |
import subprocess
|
| 15 |
+
import json
|
| 16 |
|
| 17 |
# Configure logging
|
| 18 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
|
|
|
| 95 |
os.unlink(temp_audio_file.name)
|
| 96 |
return ' '.join(transcriptions)
|
| 97 |
|
| 98 |
+
def get_file_info(file_path):
|
| 99 |
+
try:
|
| 100 |
+
result = subprocess.run(['ffprobe', '-v', 'quiet', '-print_format', 'json', '-show_format', '-show_streams', file_path],
|
| 101 |
+
capture_output=True, text=True, check=True)
|
| 102 |
+
return json.loads(result.stdout)
|
| 103 |
+
except subprocess.CalledProcessError as e:
|
| 104 |
+
logger.error(f"Error getting file info: {str(e)}")
|
| 105 |
+
return None
|
| 106 |
+
|
| 107 |
def download_file(url):
|
| 108 |
local_filename = url.split('/')[-1]
|
| 109 |
with requests.get(url, stream=True) as r:
|
|
|
|
| 136 |
temp_file = temp_file.name
|
| 137 |
logger.info(f"Uploaded file saved: {temp_file}")
|
| 138 |
|
| 139 |
+
# Get file info
|
| 140 |
+
file_info = get_file_info(temp_file)
|
| 141 |
+
if not file_info:
|
| 142 |
+
return "Unable to process file: Could not determine file type", False
|
| 143 |
+
|
| 144 |
+
# Determine if it's audio or video
|
| 145 |
+
is_audio = any(stream['codec_type'] == 'audio' for stream in file_info['streams'])
|
| 146 |
+
is_video = any(stream['codec_type'] == 'video' for stream in file_info['streams'])
|
| 147 |
+
|
| 148 |
# Convert to WAV using ffmpeg
|
| 149 |
wav_path = tempfile.NamedTemporaryFile(delete=False, suffix='.wav').name
|
| 150 |
try:
|
| 151 |
+
if is_video:
|
| 152 |
+
# Extract audio from video
|
| 153 |
+
subprocess.run(['ffmpeg', '-i', temp_file, '-vn', '-acodec', 'pcm_s16le', '-ar', '44100', '-ac', '2', wav_path], check=True)
|
| 154 |
+
elif is_audio:
|
| 155 |
+
# Convert audio to WAV
|
| 156 |
+
subprocess.run(['ffmpeg', '-i', temp_file, '-acodec', 'pcm_s16le', '-ar', '44100', '-ac', '2', wav_path], check=True)
|
| 157 |
+
else:
|
| 158 |
+
return "Unsupported file type: Neither audio nor video detected", False
|
| 159 |
+
|
| 160 |
logger.info(f"Audio extracted to WAV: {wav_path}")
|
| 161 |
except subprocess.CalledProcessError as e:
|
| 162 |
logger.error(f"FFmpeg conversion failed: {str(e)}")
|
|
|
|
| 184 |
os.unlink(temp_file)
|
| 185 |
if wav_path and os.path.exists(wav_path):
|
| 186 |
os.unlink(wav_path)
|
| 187 |
+
|
| 188 |
@app.callback(
|
| 189 |
[Output('output-media-upload', 'children'),
|
| 190 |
Output('transcription-status', 'children'),
|