Update app.py
Browse files
app.py
CHANGED
|
@@ -11,6 +11,7 @@ from pydub import AudioSegment
|
|
| 11 |
import requests
|
| 12 |
import mimetypes
|
| 13 |
import urllib.parse
|
|
|
|
| 14 |
|
| 15 |
# Configure logging
|
| 16 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
|
@@ -18,7 +19,7 @@ logger = logging.getLogger(__name__)
|
|
| 18 |
|
| 19 |
# Try to import moviepy with the simpler import statement
|
| 20 |
try:
|
| 21 |
-
from moviepy import VideoFileClip
|
| 22 |
logger.info("MoviePy (VideoFileClip) successfully imported")
|
| 23 |
except ImportError as e:
|
| 24 |
logger.error(f"Error importing MoviePy (VideoFileClip): {str(e)}")
|
|
@@ -92,6 +93,15 @@ def transcribe_audio_chunks(chunks):
|
|
| 92 |
transcriptions.append(transcript.get('text', ''))
|
| 93 |
return ' '.join(transcriptions)
|
| 94 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
def process_media(file_path, is_url=False):
|
| 96 |
global generated_file, transcription_text
|
| 97 |
temp_file = None
|
|
@@ -100,43 +110,27 @@ def process_media(file_path, is_url=False):
|
|
| 100 |
if is_url:
|
| 101 |
logger.info(f"Processing URL: {file_path}")
|
| 102 |
try:
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
content_type = response.headers.get('content-type', '')
|
| 106 |
-
extension = mimetypes.guess_extension(content_type) or ''
|
| 107 |
-
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=extension)
|
| 108 |
-
for chunk in response.iter_content(chunk_size=8192):
|
| 109 |
-
temp_file.write(chunk)
|
| 110 |
-
temp_file.close()
|
| 111 |
-
logger.info(f"URL content downloaded: {temp_file.name}")
|
| 112 |
except Exception as e:
|
| 113 |
logger.error(f"Error downloading URL content: {str(e)}")
|
| 114 |
return f"Error downloading URL content: {str(e)}", False
|
| 115 |
else:
|
| 116 |
logger.info("Processing uploaded file")
|
| 117 |
-
temp_file = tempfile.NamedTemporaryFile(delete=False)
|
| 118 |
temp_file.write(file_path)
|
| 119 |
temp_file.close()
|
| 120 |
-
|
|
|
|
| 121 |
|
| 122 |
-
#
|
|
|
|
| 123 |
try:
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
logger.warning(f"Could not process with moviepy: {str(e)}")
|
| 130 |
-
# If moviepy fails, try with pydub
|
| 131 |
-
try:
|
| 132 |
-
audio = AudioSegment.from_file(temp_file.name)
|
| 133 |
-
wav_path = temp_file.name + ".wav"
|
| 134 |
-
audio.export(wav_path, format="wav")
|
| 135 |
-
except Exception as e:
|
| 136 |
-
logger.error(f"Could not process audio: {str(e)}")
|
| 137 |
-
return f"Could not process audio: {str(e)}", False
|
| 138 |
-
|
| 139 |
-
logger.info(f"Audio extracted to WAV: {wav_path}")
|
| 140 |
|
| 141 |
# Chunk the audio file
|
| 142 |
audio = AudioSegment.from_wav(wav_path)
|
|
@@ -156,8 +150,8 @@ def process_media(file_path, is_url=False):
|
|
| 156 |
logger.error(f"Error during processing: {str(e)}")
|
| 157 |
return f"An error occurred: {str(e)}", False
|
| 158 |
finally:
|
| 159 |
-
if temp_file and os.path.exists(temp_file
|
| 160 |
-
os.unlink(temp_file
|
| 161 |
if wav_path and os.path.exists(wav_path):
|
| 162 |
os.unlink(wav_path)
|
| 163 |
|
|
|
|
| 11 |
import requests
|
| 12 |
import mimetypes
|
| 13 |
import urllib.parse
|
| 14 |
+
import subprocess
|
| 15 |
|
| 16 |
# Configure logging
|
| 17 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
|
|
|
| 19 |
|
| 20 |
# Try to import moviepy with the simpler import statement
|
| 21 |
try:
|
| 22 |
+
from moviepy import VideoFileClip, AudioFileClip
|
| 23 |
logger.info("MoviePy (VideoFileClip) successfully imported")
|
| 24 |
except ImportError as e:
|
| 25 |
logger.error(f"Error importing MoviePy (VideoFileClip): {str(e)}")
|
|
|
|
| 93 |
transcriptions.append(transcript.get('text', ''))
|
| 94 |
return ' '.join(transcriptions)
|
| 95 |
|
| 96 |
+
def download_file(url):
|
| 97 |
+
local_filename = url.split('/')[-1]
|
| 98 |
+
with requests.get(url, stream=True) as r:
|
| 99 |
+
r.raise_for_status()
|
| 100 |
+
with open(local_filename, 'wb') as f:
|
| 101 |
+
for chunk in r.iter_content(chunk_size=8192):
|
| 102 |
+
f.write(chunk)
|
| 103 |
+
return local_filename
|
| 104 |
+
|
| 105 |
def process_media(file_path, is_url=False):
|
| 106 |
global generated_file, transcription_text
|
| 107 |
temp_file = None
|
|
|
|
| 110 |
if is_url:
|
| 111 |
logger.info(f"Processing URL: {file_path}")
|
| 112 |
try:
|
| 113 |
+
temp_file = download_file(file_path)
|
| 114 |
+
logger.info(f"URL content downloaded: {temp_file}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
except Exception as e:
|
| 116 |
logger.error(f"Error downloading URL content: {str(e)}")
|
| 117 |
return f"Error downloading URL content: {str(e)}", False
|
| 118 |
else:
|
| 119 |
logger.info("Processing uploaded file")
|
| 120 |
+
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
|
| 121 |
temp_file.write(file_path)
|
| 122 |
temp_file.close()
|
| 123 |
+
temp_file = temp_file.name
|
| 124 |
+
logger.info(f"Uploaded file saved: {temp_file}")
|
| 125 |
|
| 126 |
+
# Convert to WAV using ffmpeg
|
| 127 |
+
wav_path = temp_file + ".wav"
|
| 128 |
try:
|
| 129 |
+
subprocess.run(['ffmpeg', '-i', temp_file, '-acodec', 'pcm_s16le', '-ar', '44100', wav_path], check=True)
|
| 130 |
+
logger.info(f"Audio extracted to WAV: {wav_path}")
|
| 131 |
+
except subprocess.CalledProcessError as e:
|
| 132 |
+
logger.error(f"FFmpeg conversion failed: {str(e)}")
|
| 133 |
+
return f"FFmpeg conversion failed: {str(e)}", False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
|
| 135 |
# Chunk the audio file
|
| 136 |
audio = AudioSegment.from_wav(wav_path)
|
|
|
|
| 150 |
logger.error(f"Error during processing: {str(e)}")
|
| 151 |
return f"An error occurred: {str(e)}", False
|
| 152 |
finally:
|
| 153 |
+
if temp_file and os.path.exists(temp_file):
|
| 154 |
+
os.unlink(temp_file)
|
| 155 |
if wav_path and os.path.exists(wav_path):
|
| 156 |
os.unlink(wav_path)
|
| 157 |
|