Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -36,8 +36,6 @@ load_dotenv()
|
|
| 36 |
|
| 37 |
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
|
| 38 |
BRIGHTDATA_API_KEY = os.getenv("BRIGHTDATA_API_KEY")
|
| 39 |
-
# Web Unlocker zone name (set in HF Secrets as BRIGHTDATA_UNLOCKER_ZONE; default goodreads_unlocker)
|
| 40 |
-
BRIGHTDATA_UNLOCKER_ZONE = os.getenv("BRIGHTDATA_UNLOCKER_ZONE", "goodreads_unlocker")
|
| 41 |
|
| 42 |
if not GROQ_API_KEY:
|
| 43 |
raise ValueError("GROQ_API_KEY is not set.")
|
|
@@ -71,7 +69,7 @@ def scrape_website(url: str):
|
|
| 71 |
"Content-Type": "application/json",
|
| 72 |
"Host": "api.brightdata.com",
|
| 73 |
}
|
| 74 |
-
payload = {"zone":
|
| 75 |
|
| 76 |
try:
|
| 77 |
resp = requests.post(
|
|
@@ -146,9 +144,10 @@ def _fetch_transcript_via_brightdata(video_id: str) -> str:
|
|
| 146 |
"Content-Type": "application/json",
|
| 147 |
"Host": "api.brightdata.com",
|
| 148 |
}
|
| 149 |
-
#
|
|
|
|
| 150 |
watch_url = f"https://www.youtube.com/watch?v={video_id}"
|
| 151 |
-
payload = {"zone":
|
| 152 |
resp = requests.post(api_url, json=payload, headers=headers, timeout=120, verify=False)
|
| 153 |
resp.raise_for_status()
|
| 154 |
html = resp.text
|
|
@@ -178,7 +177,7 @@ def _fetch_transcript_via_brightdata(video_id: str) -> str:
|
|
| 178 |
if not base_url:
|
| 179 |
raise ValueError("No caption track URL found.")
|
| 180 |
caption_url = base_url + ("&" if "?" in base_url else "?") + "fmt=json3"
|
| 181 |
-
payload2 = {"zone":
|
| 182 |
resp2 = requests.post(api_url, json=payload2, headers=headers, timeout=60, verify=False)
|
| 183 |
resp2.raise_for_status()
|
| 184 |
caption_data = resp2.json()
|
|
@@ -252,7 +251,7 @@ def fetch_youtube_transcript(video_input: str):
|
|
| 252 |
contexts["youtube"] = ""
|
| 253 |
return (
|
| 254 |
f"Direct fetch failed (network restricted). Bright Data fallback also failed: {fallback_err}. "
|
| 255 |
-
"Ensure BRIGHTDATA_API_KEY and
|
| 256 |
"",
|
| 257 |
)
|
| 258 |
contexts["youtube"] = ""
|
|
|
|
| 36 |
|
| 37 |
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
|
| 38 |
BRIGHTDATA_API_KEY = os.getenv("BRIGHTDATA_API_KEY")
|
|
|
|
|
|
|
| 39 |
|
| 40 |
if not GROQ_API_KEY:
|
| 41 |
raise ValueError("GROQ_API_KEY is not set.")
|
|
|
|
| 69 |
"Content-Type": "application/json",
|
| 70 |
"Host": "api.brightdata.com",
|
| 71 |
}
|
| 72 |
+
payload = {"zone": "goodreads_unlocker", "url": target_url, "format": "raw"}
|
| 73 |
|
| 74 |
try:
|
| 75 |
resp = requests.post(
|
|
|
|
| 144 |
"Content-Type": "application/json",
|
| 145 |
"Host": "api.brightdata.com",
|
| 146 |
}
|
| 147 |
+
# Try web_unlocker (generic) or youtube_unlocker; user may need to create zone in Bright Data
|
| 148 |
+
zone = os.getenv("YOUTUBE_UNLOCKER_ZONE", "web_unlocker")
|
| 149 |
watch_url = f"https://www.youtube.com/watch?v={video_id}"
|
| 150 |
+
payload = {"zone": zone, "url": watch_url, "format": "raw"}
|
| 151 |
resp = requests.post(api_url, json=payload, headers=headers, timeout=120, verify=False)
|
| 152 |
resp.raise_for_status()
|
| 153 |
html = resp.text
|
|
|
|
| 177 |
if not base_url:
|
| 178 |
raise ValueError("No caption track URL found.")
|
| 179 |
caption_url = base_url + ("&" if "?" in base_url else "?") + "fmt=json3"
|
| 180 |
+
payload2 = {"zone": zone, "url": caption_url, "format": "raw"}
|
| 181 |
resp2 = requests.post(api_url, json=payload2, headers=headers, timeout=60, verify=False)
|
| 182 |
resp2.raise_for_status()
|
| 183 |
caption_data = resp2.json()
|
|
|
|
| 251 |
contexts["youtube"] = ""
|
| 252 |
return (
|
| 253 |
f"Direct fetch failed (network restricted). Bright Data fallback also failed: {fallback_err}. "
|
| 254 |
+
"Ensure BRIGHTDATA_API_KEY is set and you have a 'web_unlocker' zone in Bright Data.",
|
| 255 |
"",
|
| 256 |
)
|
| 257 |
contexts["youtube"] = ""
|