Spaces:
Running
Running
samir72
commited on
Commit
·
ec7331c
1
Parent(s):
9865a29
Error handling
Browse files
Youtubetranscription_summarizer.py
CHANGED
|
@@ -25,14 +25,17 @@ def nslookup(domain):
|
|
| 25 |
# Perform DNS lookup for the domain
|
| 26 |
addresses = socket.getaddrinfo(domain, None)
|
| 27 |
print(f"DNS lookup succesfull for {domain}:")
|
|
|
|
| 28 |
# for addr in addresses:
|
| 29 |
# # Extract IP address from the result
|
| 30 |
# ip = addr[4][0]
|
| 31 |
# print(f"IP Address: {ip}")
|
| 32 |
except socket.gaierror as e:
|
| 33 |
print(f"DNS lookup failed for {domain}: {e}")
|
|
|
|
| 34 |
except Exception as e:
|
| 35 |
print(f"An unexpected error occurred: {e}")
|
|
|
|
| 36 |
|
| 37 |
def extract_domain(url):
|
| 38 |
# Regular expression to match the domain name
|
|
@@ -146,12 +149,14 @@ def download_youtube_audio_wav16k_api(
|
|
| 146 |
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
| 147 |
ydl.extract_info(youtube_url, download=True)
|
| 148 |
except Exception as e:
|
| 149 |
-
raise YTDLPError(f"yt-dlp API failed: {e}") from e
|
|
|
|
| 150 |
|
| 151 |
# Locate the produced WAV (pre-downsampled)
|
| 152 |
pre_wavs = list(work_dir.glob("*.wav"))
|
| 153 |
if not pre_wavs:
|
| 154 |
-
raise YTDLPError("yt-dlp completed but no WAV was found.")
|
|
|
|
| 155 |
pre_wav = max(pre_wavs, key=lambda p: p.stat().st_mtime)
|
| 156 |
|
| 157 |
# Second stage: force 16 kHz mono via ffmpeg
|
|
@@ -171,7 +176,8 @@ def download_youtube_audio_wav16k_api(
|
|
| 171 |
text=True,
|
| 172 |
)
|
| 173 |
except subprocess.CalledProcessError as e:
|
| 174 |
-
raise YTDLPError(f"ffmpeg failed to resample: {e.stderr or e.stdout}") from e
|
|
|
|
| 175 |
|
| 176 |
# Clean up intermediates if desired
|
| 177 |
if not keep_intermediate:
|
|
|
|
| 25 |
# Perform DNS lookup for the domain
|
| 26 |
addresses = socket.getaddrinfo(domain, None)
|
| 27 |
print(f"DNS lookup succesfull for {domain}:")
|
| 28 |
+
return True
|
| 29 |
# for addr in addresses:
|
| 30 |
# # Extract IP address from the result
|
| 31 |
# ip = addr[4][0]
|
| 32 |
# print(f"IP Address: {ip}")
|
| 33 |
except socket.gaierror as e:
|
| 34 |
print(f"DNS lookup failed for {domain}: {e}")
|
| 35 |
+
return False
|
| 36 |
except Exception as e:
|
| 37 |
print(f"An unexpected error occurred: {e}")
|
| 38 |
+
return False
|
| 39 |
|
| 40 |
def extract_domain(url):
|
| 41 |
# Regular expression to match the domain name
|
|
|
|
| 149 |
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
| 150 |
ydl.extract_info(youtube_url, download=True)
|
| 151 |
except Exception as e:
|
| 152 |
+
#raise YTDLPError(f"yt-dlp API failed: {e}") from e
|
| 153 |
+
return f"yt-dlp API failed: {e}"
|
| 154 |
|
| 155 |
# Locate the produced WAV (pre-downsampled)
|
| 156 |
pre_wavs = list(work_dir.glob("*.wav"))
|
| 157 |
if not pre_wavs:
|
| 158 |
+
#raise YTDLPError("yt-dlp completed but no WAV was found.")
|
| 159 |
+
return "yt-dlp completed but no WAV was found."
|
| 160 |
pre_wav = max(pre_wavs, key=lambda p: p.stat().st_mtime)
|
| 161 |
|
| 162 |
# Second stage: force 16 kHz mono via ffmpeg
|
|
|
|
| 176 |
text=True,
|
| 177 |
)
|
| 178 |
except subprocess.CalledProcessError as e:
|
| 179 |
+
#raise YTDLPError(f"ffmpeg failed to resample: {e.stderr or e.stdout}") from e
|
| 180 |
+
return f"ffmpeg failed to resample: {e.stderr or e.stdout}"
|
| 181 |
|
| 182 |
# Clean up intermediates if desired
|
| 183 |
if not keep_intermediate:
|
__pycache__/Youtubetranscription_summarizer.cpython-313.pyc
CHANGED
|
Binary files a/__pycache__/Youtubetranscription_summarizer.cpython-313.pyc and b/__pycache__/Youtubetranscription_summarizer.cpython-313.pyc differ
|
|
|
app.py
CHANGED
|
@@ -13,7 +13,7 @@ import re
|
|
| 13 |
|
| 14 |
# --- LLM call (Azure OpenAI with API key) -----------------------------------
|
| 15 |
|
| 16 |
-
def summarize_input(audio_b64: str = None, text_input: str = None, sys_prompt: str = None, user_prompt: str = None) -> str:
|
| 17 |
"""
|
| 18 |
Calls Azure OpenAI Chat Completions with audio input (base64 mp3) or text input, or both.
|
| 19 |
"""
|
|
@@ -82,7 +82,9 @@ def summarize_input(audio_b64: str = None, text_input: str = None, sys_prompt: s
|
|
| 82 |
{"role": "user", "content": content},
|
| 83 |
],
|
| 84 |
)
|
| 85 |
-
|
|
|
|
|
|
|
| 86 |
f"audio_size={len(audio_b64 or '')}, text_input_size={len(json_text or '')}")
|
| 87 |
return response.choices[0].message.content
|
| 88 |
|
|
@@ -132,7 +134,11 @@ def process_audio(upload_path, record_path, url, sys_prompt, user_prompt):
|
|
| 132 |
tmp_to_cleanup = []
|
| 133 |
audio_b64 = None
|
| 134 |
text_input = None
|
|
|
|
| 135 |
try:
|
|
|
|
|
|
|
|
|
|
| 136 |
audio_path = None
|
| 137 |
if upload_path:
|
| 138 |
audio_path = upload_path
|
|
@@ -142,23 +148,29 @@ def process_audio(upload_path, record_path, url, sys_prompt, user_prompt):
|
|
| 142 |
# Check dns resolution of the url domain
|
| 143 |
domain = Youtubetranscription_summarizer.extract_domain(url)
|
| 144 |
if domain:
|
| 145 |
-
Youtubetranscription_summarizer.nslookup(domain) # Check DNS resolution of the domain
|
| 146 |
-
|
| 147 |
-
|
| 148 |
|
| 149 |
-
if
|
| 150 |
-
#
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
if not audio_path and text_input is None:
|
| 157 |
return "Please provide content via upload, recording, or URL."
|
| 158 |
# If we have an audio file, encode it
|
| 159 |
if audio_path:
|
| 160 |
audio_b64 = encode_audio_from_path(audio_path)
|
| 161 |
-
return summarize_input(audio_b64, text_input, sys_prompt, user_prompt)
|
| 162 |
|
| 163 |
except Exception as e:
|
| 164 |
return print(f"Error processing audio at {datetime.now()}: prompt_length={len(user_prompt)}, audio_path={audio_path}: {str(e)}")
|
|
|
|
| 13 |
|
| 14 |
# --- LLM call (Azure OpenAI with API key) -----------------------------------
|
| 15 |
|
| 16 |
+
def summarize_input(audio_b64: str = None, text_input: str = None, sys_prompt: str = None, user_prompt: str = None, Starttime: datetime = None) -> str:
|
| 17 |
"""
|
| 18 |
Calls Azure OpenAI Chat Completions with audio input (base64 mp3) or text input, or both.
|
| 19 |
"""
|
|
|
|
| 82 |
{"role": "user", "content": content},
|
| 83 |
],
|
| 84 |
)
|
| 85 |
+
Enddate = datetime.now()
|
| 86 |
+
Callduration = Enddate - Starttime[0]
|
| 87 |
+
print(f"Azure API call with a duration of {Callduration}: prompt_length={len(user_prompt or '')}, "
|
| 88 |
f"audio_size={len(audio_b64 or '')}, text_input_size={len(json_text or '')}")
|
| 89 |
return response.choices[0].message.content
|
| 90 |
|
|
|
|
| 134 |
tmp_to_cleanup = []
|
| 135 |
audio_b64 = None
|
| 136 |
text_input = None
|
| 137 |
+
domaincheck = None
|
| 138 |
try:
|
| 139 |
+
# Capture start time for logging
|
| 140 |
+
Starttime = datetime.now(),
|
| 141 |
+
print(f"Azure API call starts at {datetime.now()}"),
|
| 142 |
audio_path = None
|
| 143 |
if upload_path:
|
| 144 |
audio_path = upload_path
|
|
|
|
| 148 |
# Check dns resolution of the url domain
|
| 149 |
domain = Youtubetranscription_summarizer.extract_domain(url)
|
| 150 |
if domain:
|
| 151 |
+
domaincheck = Youtubetranscription_summarizer.nslookup(domain) # Check DNS resolution of the domain
|
| 152 |
+
else:
|
| 153 |
+
return "Invalid URL format."
|
| 154 |
|
| 155 |
+
if domaincheck:
|
| 156 |
+
# Check if the url is a youtube link
|
| 157 |
+
CheckURL = re.search(r"Youtube", url, re.IGNORECASE)
|
| 158 |
+
|
| 159 |
+
if CheckURL:
|
| 160 |
+
# Get the transcription from youtube
|
| 161 |
+
text_input = Youtubetranscription_summarizer.main(url.strip()) # Youtube files are transcribed and summarized
|
| 162 |
+
tmp_to_cleanup.append(text_input)
|
| 163 |
+
else:
|
| 164 |
+
audio_path = download_to_temp_mp3(url.strip())
|
| 165 |
+
tmp_to_cleanup.append(audio_path)
|
| 166 |
+
else:
|
| 167 |
+
return f"DNS lookup failed for {domain}"
|
| 168 |
if not audio_path and text_input is None:
|
| 169 |
return "Please provide content via upload, recording, or URL."
|
| 170 |
# If we have an audio file, encode it
|
| 171 |
if audio_path:
|
| 172 |
audio_b64 = encode_audio_from_path(audio_path)
|
| 173 |
+
return summarize_input(audio_b64, text_input, sys_prompt, user_prompt, Starttime)
|
| 174 |
|
| 175 |
except Exception as e:
|
| 176 |
return print(f"Error processing audio at {datetime.now()}: prompt_length={len(user_prompt)}, audio_path={audio_path}: {str(e)}")
|