Update app.py
Browse files
app.py
CHANGED
|
@@ -15,21 +15,28 @@ import numpy as np
|
|
| 15 |
transcription_pipeline = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h")
|
| 16 |
|
| 17 |
def download_audio_from_url(url):
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
else:
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
|
|
|
|
|
|
|
|
|
| 33 |
|
| 34 |
def transcribe_audio(audio_bytes):
|
| 35 |
audio = AudioSegment.from_file(io.BytesIO(audio_bytes))
|
|
@@ -77,10 +84,21 @@ def transcribe_audio(audio_bytes):
|
|
| 77 |
return formatted_transcript
|
| 78 |
|
| 79 |
def transcribe_video(url):
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
def download_transcript(transcript):
|
| 85 |
return transcript, "transcript.txt"
|
| 86 |
|
|
|
|
| 15 |
transcription_pipeline = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h")
|
| 16 |
|
| 17 |
def download_audio_from_url(url):
|
| 18 |
+
try:
|
| 19 |
+
if "share" in url:
|
| 20 |
+
print("Processing shareable link...")
|
| 21 |
+
response = requests.get(url)
|
| 22 |
+
soup = BeautifulSoup(response.content, 'html.parser')
|
| 23 |
+
video_tag = soup.find('video')
|
| 24 |
+
if video_tag and 'src' in video_tag.attrs:
|
| 25 |
+
video_url = video_tag['src']
|
| 26 |
+
print(f"Extracted video URL: {video_url}")
|
| 27 |
+
else:
|
| 28 |
+
raise ValueError("Direct video URL not found in the shareable link.")
|
| 29 |
else:
|
| 30 |
+
video_url = url
|
| 31 |
+
|
| 32 |
+
print(f"Downloading video from URL: {video_url}")
|
| 33 |
+
response = requests.get(video_url)
|
| 34 |
+
audio_bytes = response.content
|
| 35 |
+
print(f"Successfully downloaded {len(audio_bytes)} bytes of data")
|
| 36 |
+
return audio_bytes
|
| 37 |
+
except Exception as e:
|
| 38 |
+
print(f"Error in download_audio_from_url: {str(e)}")
|
| 39 |
+
raise
|
| 40 |
|
| 41 |
def transcribe_audio(audio_bytes):
|
| 42 |
audio = AudioSegment.from_file(io.BytesIO(audio_bytes))
|
|
|
|
| 84 |
return formatted_transcript
|
| 85 |
|
| 86 |
def transcribe_video(url):
|
| 87 |
+
try:
|
| 88 |
+
print(f"Attempting to download audio from URL: {url}")
|
| 89 |
+
audio_bytes = download_audio_from_url(url)
|
| 90 |
+
print(f"Successfully downloaded {len(audio_bytes)} bytes of audio data")
|
| 91 |
+
|
| 92 |
+
print("Starting audio transcription...")
|
| 93 |
+
transcript = transcribe_audio(audio_bytes)
|
| 94 |
+
print("Transcription completed successfully")
|
| 95 |
+
|
| 96 |
+
return transcript
|
| 97 |
+
except Exception as e:
|
| 98 |
+
error_message = f"An error occurred: {str(e)}"
|
| 99 |
+
print(error_message)
|
| 100 |
+
return error_message
|
| 101 |
+
|
| 102 |
def download_transcript(transcript):
|
| 103 |
return transcript, "transcript.txt"
|
| 104 |
|