Update app.py
Browse files
app.py
CHANGED
|
@@ -39,7 +39,10 @@ def extract_links_and_text(base_url, max_depth=1, visited=None):
|
|
| 39 |
visited.add(base_url)
|
| 40 |
print(f"🔗 Crawling: {base_url}")
|
| 41 |
try:
|
| 42 |
-
|
|
|
|
|
|
|
|
|
|
| 43 |
response.raise_for_status()
|
| 44 |
soup = BeautifulSoup(response.text, 'html.parser')
|
| 45 |
page_text = ' '.join([p.get_text() for p in soup.find_all(['p', 'h1', 'h2', 'h3'])])
|
|
|
|
| 39 |
visited.add(base_url)
|
| 40 |
print(f"🔗 Crawling: {base_url}")
|
| 41 |
try:
|
| 42 |
+
headers = {
|
| 43 |
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36"
|
| 44 |
+
}
|
| 45 |
+
response = requests.get(base_url, headers=headers, timeout=10)
|
| 46 |
response.raise_for_status()
|
| 47 |
soup = BeautifulSoup(response.text, 'html.parser')
|
| 48 |
page_text = ' '.join([p.get_text() for p in soup.find_all(['p', 'h1', 'h2', 'h3'])])
|