samir72 commited on
Commit
ec7331c
·
1 Parent(s): 9865a29

Error handling

Browse files
Youtubetranscription_summarizer.py CHANGED
@@ -25,14 +25,17 @@ def nslookup(domain):
25
  # Perform DNS lookup for the domain
26
  addresses = socket.getaddrinfo(domain, None)
27
  print(f"DNS lookup succesfull for {domain}:")
 
28
  # for addr in addresses:
29
  # # Extract IP address from the result
30
  # ip = addr[4][0]
31
  # print(f"IP Address: {ip}")
32
  except socket.gaierror as e:
33
  print(f"DNS lookup failed for {domain}: {e}")
 
34
  except Exception as e:
35
  print(f"An unexpected error occurred: {e}")
 
36
 
37
  def extract_domain(url):
38
  # Regular expression to match the domain name
@@ -146,12 +149,14 @@ def download_youtube_audio_wav16k_api(
146
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
147
  ydl.extract_info(youtube_url, download=True)
148
  except Exception as e:
149
- raise YTDLPError(f"yt-dlp API failed: {e}") from e
 
150
 
151
  # Locate the produced WAV (pre-downsampled)
152
  pre_wavs = list(work_dir.glob("*.wav"))
153
  if not pre_wavs:
154
- raise YTDLPError("yt-dlp completed but no WAV was found.")
 
155
  pre_wav = max(pre_wavs, key=lambda p: p.stat().st_mtime)
156
 
157
  # Second stage: force 16 kHz mono via ffmpeg
@@ -171,7 +176,8 @@ def download_youtube_audio_wav16k_api(
171
  text=True,
172
  )
173
  except subprocess.CalledProcessError as e:
174
- raise YTDLPError(f"ffmpeg failed to resample: {e.stderr or e.stdout}") from e
 
175
 
176
  # Clean up intermediates if desired
177
  if not keep_intermediate:
 
25
  # Perform DNS lookup for the domain
26
  addresses = socket.getaddrinfo(domain, None)
27
  print(f"DNS lookup succesfull for {domain}:")
28
+ return True
29
  # for addr in addresses:
30
  # # Extract IP address from the result
31
  # ip = addr[4][0]
32
  # print(f"IP Address: {ip}")
33
  except socket.gaierror as e:
34
  print(f"DNS lookup failed for {domain}: {e}")
35
+ return False
36
  except Exception as e:
37
  print(f"An unexpected error occurred: {e}")
38
+ return False
39
 
40
  def extract_domain(url):
41
  # Regular expression to match the domain name
 
149
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
150
  ydl.extract_info(youtube_url, download=True)
151
  except Exception as e:
152
+ #raise YTDLPError(f"yt-dlp API failed: {e}") from e
153
+ return f"yt-dlp API failed: {e}"
154
 
155
  # Locate the produced WAV (pre-downsampled)
156
  pre_wavs = list(work_dir.glob("*.wav"))
157
  if not pre_wavs:
158
+ #raise YTDLPError("yt-dlp completed but no WAV was found.")
159
+ return "yt-dlp completed but no WAV was found."
160
  pre_wav = max(pre_wavs, key=lambda p: p.stat().st_mtime)
161
 
162
  # Second stage: force 16 kHz mono via ffmpeg
 
176
  text=True,
177
  )
178
  except subprocess.CalledProcessError as e:
179
+ #raise YTDLPError(f"ffmpeg failed to resample: {e.stderr or e.stdout}") from e
180
+ return f"ffmpeg failed to resample: {e.stderr or e.stdout}"
181
 
182
  # Clean up intermediates if desired
183
  if not keep_intermediate:
__pycache__/Youtubetranscription_summarizer.cpython-313.pyc CHANGED
Binary files a/__pycache__/Youtubetranscription_summarizer.cpython-313.pyc and b/__pycache__/Youtubetranscription_summarizer.cpython-313.pyc differ
 
app.py CHANGED
@@ -13,7 +13,7 @@ import re
13
 
14
  # --- LLM call (Azure OpenAI with API key) -----------------------------------
15
 
16
- def summarize_input(audio_b64: str = None, text_input: str = None, sys_prompt: str = None, user_prompt: str = None) -> str:
17
  """
18
  Calls Azure OpenAI Chat Completions with audio input (base64 mp3) or text input, or both.
19
  """
@@ -82,7 +82,9 @@ def summarize_input(audio_b64: str = None, text_input: str = None, sys_prompt: s
82
  {"role": "user", "content": content},
83
  ],
84
  )
85
- print(f"Azure API call at {datetime.now()}: prompt_length={len(user_prompt or '')}, "
 
 
86
  f"audio_size={len(audio_b64 or '')}, text_input_size={len(json_text or '')}")
87
  return response.choices[0].message.content
88
 
@@ -132,7 +134,11 @@ def process_audio(upload_path, record_path, url, sys_prompt, user_prompt):
132
  tmp_to_cleanup = []
133
  audio_b64 = None
134
  text_input = None
 
135
  try:
 
 
 
136
  audio_path = None
137
  if upload_path:
138
  audio_path = upload_path
@@ -142,23 +148,29 @@ def process_audio(upload_path, record_path, url, sys_prompt, user_prompt):
142
  # Check dns resolution of the url domain
143
  domain = Youtubetranscription_summarizer.extract_domain(url)
144
  if domain:
145
- Youtubetranscription_summarizer.nslookup(domain) # Check DNS resolution of the domain
146
- #Check if it's a youtube url
147
- CheckURL = re.search(r"Youtube", url, re.IGNORECASE)
148
 
149
- if CheckURL:
150
- # Get the transcription from youtube
151
- text_input = Youtubetranscription_summarizer.main(url.strip()) # Youtube files are transcribed and summarized
152
- tmp_to_cleanup.append(text_input)
153
- else:
154
- audio_path = download_to_temp_mp3(url.strip())
155
- tmp_to_cleanup.append(audio_path)
 
 
 
 
 
 
156
  if not audio_path and text_input is None:
157
  return "Please provide content via upload, recording, or URL."
158
  # If we have an audio file, encode it
159
  if audio_path:
160
  audio_b64 = encode_audio_from_path(audio_path)
161
- return summarize_input(audio_b64, text_input, sys_prompt, user_prompt)
162
 
163
  except Exception as e:
164
  return print(f"Error processing audio at {datetime.now()}: prompt_length={len(user_prompt)}, audio_path={audio_path}: {str(e)}")
 
13
 
14
  # --- LLM call (Azure OpenAI with API key) -----------------------------------
15
 
16
+ def summarize_input(audio_b64: str = None, text_input: str = None, sys_prompt: str = None, user_prompt: str = None, Starttime: datetime = None) -> str:
17
  """
18
  Calls Azure OpenAI Chat Completions with audio input (base64 mp3) or text input, or both.
19
  """
 
82
  {"role": "user", "content": content},
83
  ],
84
  )
85
+ Enddate = datetime.now()
86
+ Callduration = Enddate - Starttime[0]
87
+ print(f"Azure API call with a duration of {Callduration}: prompt_length={len(user_prompt or '')}, "
88
  f"audio_size={len(audio_b64 or '')}, text_input_size={len(json_text or '')}")
89
  return response.choices[0].message.content
90
 
 
134
  tmp_to_cleanup = []
135
  audio_b64 = None
136
  text_input = None
137
+ domaincheck = None
138
  try:
139
+ # Capture start time for logging
140
+ Starttime = datetime.now(),
141
+ print(f"Azure API call starts at {datetime.now()}"),
142
  audio_path = None
143
  if upload_path:
144
  audio_path = upload_path
 
148
  # Check dns resolution of the url domain
149
  domain = Youtubetranscription_summarizer.extract_domain(url)
150
  if domain:
151
+ domaincheck = Youtubetranscription_summarizer.nslookup(domain) # Check DNS resolution of the domain
152
+ else:
153
+ return "Invalid URL format."
154
 
155
+ if domaincheck:
156
+ # Check if the url is a youtube link
157
+ CheckURL = re.search(r"Youtube", url, re.IGNORECASE)
158
+
159
+ if CheckURL:
160
+ # Get the transcription from youtube
161
+ text_input = Youtubetranscription_summarizer.main(url.strip()) # Youtube files are transcribed and summarized
162
+ tmp_to_cleanup.append(text_input)
163
+ else:
164
+ audio_path = download_to_temp_mp3(url.strip())
165
+ tmp_to_cleanup.append(audio_path)
166
+ else:
167
+ return f"DNS lookup failed for {domain}"
168
  if not audio_path and text_input is None:
169
  return "Please provide content via upload, recording, or URL."
170
  # If we have an audio file, encode it
171
  if audio_path:
172
  audio_b64 = encode_audio_from_path(audio_path)
173
+ return summarize_input(audio_b64, text_input, sys_prompt, user_prompt, Starttime)
174
 
175
  except Exception as e:
176
  return print(f"Error processing audio at {datetime.now()}: prompt_length={len(user_prompt)}, audio_path={audio_path}: {str(e)}")