Ansnaeem commited on
Commit
155e009
·
verified ·
1 Parent(s): e628cf0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -7
app.py CHANGED
@@ -36,8 +36,6 @@ load_dotenv()
36
 
37
  GROQ_API_KEY = os.getenv("GROQ_API_KEY")
38
  BRIGHTDATA_API_KEY = os.getenv("BRIGHTDATA_API_KEY")
39
- # Web Unlocker zone name (set in HF Secrets as BRIGHTDATA_UNLOCKER_ZONE; default goodreads_unlocker)
40
- BRIGHTDATA_UNLOCKER_ZONE = os.getenv("BRIGHTDATA_UNLOCKER_ZONE", "goodreads_unlocker")
41
 
42
  if not GROQ_API_KEY:
43
  raise ValueError("GROQ_API_KEY is not set.")
@@ -71,7 +69,7 @@ def scrape_website(url: str):
71
  "Content-Type": "application/json",
72
  "Host": "api.brightdata.com",
73
  }
74
- payload = {"zone": BRIGHTDATA_UNLOCKER_ZONE, "url": target_url, "format": "raw"}
75
 
76
  try:
77
  resp = requests.post(
@@ -146,9 +144,10 @@ def _fetch_transcript_via_brightdata(video_id: str) -> str:
146
  "Content-Type": "application/json",
147
  "Host": "api.brightdata.com",
148
  }
149
- # Use same Web Unlocker zone as scraper (from HF Secret BRIGHTDATA_UNLOCKER_ZONE)
 
150
  watch_url = f"https://www.youtube.com/watch?v={video_id}"
151
- payload = {"zone": BRIGHTDATA_UNLOCKER_ZONE, "url": watch_url, "format": "raw"}
152
  resp = requests.post(api_url, json=payload, headers=headers, timeout=120, verify=False)
153
  resp.raise_for_status()
154
  html = resp.text
@@ -178,7 +177,7 @@ def _fetch_transcript_via_brightdata(video_id: str) -> str:
178
  if not base_url:
179
  raise ValueError("No caption track URL found.")
180
  caption_url = base_url + ("&" if "?" in base_url else "?") + "fmt=json3"
181
- payload2 = {"zone": BRIGHTDATA_UNLOCKER_ZONE, "url": caption_url, "format": "raw"}
182
  resp2 = requests.post(api_url, json=payload2, headers=headers, timeout=60, verify=False)
183
  resp2.raise_for_status()
184
  caption_data = resp2.json()
@@ -252,7 +251,7 @@ def fetch_youtube_transcript(video_input: str):
252
  contexts["youtube"] = ""
253
  return (
254
  f"Direct fetch failed (network restricted). Bright Data fallback also failed: {fallback_err}. "
255
- "Ensure BRIGHTDATA_API_KEY and BRIGHTDATA_UNLOCKER_ZONE are set in HF Secrets.",
256
  "",
257
  )
258
  contexts["youtube"] = ""
 
36
 
37
  GROQ_API_KEY = os.getenv("GROQ_API_KEY")
38
  BRIGHTDATA_API_KEY = os.getenv("BRIGHTDATA_API_KEY")
 
 
39
 
40
  if not GROQ_API_KEY:
41
  raise ValueError("GROQ_API_KEY is not set.")
 
69
  "Content-Type": "application/json",
70
  "Host": "api.brightdata.com",
71
  }
72
+ payload = {"zone": "goodreads_unlocker", "url": target_url, "format": "raw"}
73
 
74
  try:
75
  resp = requests.post(
 
144
  "Content-Type": "application/json",
145
  "Host": "api.brightdata.com",
146
  }
147
+ # Try web_unlocker (generic) or youtube_unlocker; user may need to create zone in Bright Data
148
+ zone = os.getenv("YOUTUBE_UNLOCKER_ZONE", "web_unlocker")
149
  watch_url = f"https://www.youtube.com/watch?v={video_id}"
150
+ payload = {"zone": zone, "url": watch_url, "format": "raw"}
151
  resp = requests.post(api_url, json=payload, headers=headers, timeout=120, verify=False)
152
  resp.raise_for_status()
153
  html = resp.text
 
177
  if not base_url:
178
  raise ValueError("No caption track URL found.")
179
  caption_url = base_url + ("&" if "?" in base_url else "?") + "fmt=json3"
180
+ payload2 = {"zone": zone, "url": caption_url, "format": "raw"}
181
  resp2 = requests.post(api_url, json=payload2, headers=headers, timeout=60, verify=False)
182
  resp2.raise_for_status()
183
  caption_data = resp2.json()
 
251
  contexts["youtube"] = ""
252
  return (
253
  f"Direct fetch failed (network restricted). Bright Data fallback also failed: {fallback_err}. "
254
+ "Ensure BRIGHTDATA_API_KEY is set and you have a 'web_unlocker' zone in Bright Data.",
255
  "",
256
  )
257
  contexts["youtube"] = ""