Spaces:
Sleeping
Sleeping
Commit ·
04090ba
1
Parent(s): 74392da
added pandas and numpy
Browse files- requirements.txt +3 -0
- tools.py +14 -1
requirements.txt
CHANGED
|
@@ -5,3 +5,6 @@ openai
|
|
| 5 |
requests # Add this line for making HTTP requests
|
| 6 |
beautifulsoup4 # Add this line for parsing HTML
|
| 7 |
lxml # Add this line, it's a fast parser for BeautifulSoup
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
requests # Add this line for making HTTP requests
|
| 6 |
beautifulsoup4 # Add this line for parsing HTML
|
| 7 |
lxml # Add this line, it's a fast parser for BeautifulSoup
|
| 8 |
+
pandas
|
| 9 |
+
numpy
|
| 10 |
+
# playwright
|
tools.py
CHANGED
|
@@ -35,4 +35,17 @@ def scrape_url_to_dataframe(url: str) -> (pd.DataFrame | str):
|
|
| 35 |
except requests.exceptions.RequestException as e:
|
| 36 |
return f"Error fetching URL: {e}"
|
| 37 |
except Exception as e:
|
| 38 |
-
return f"An unexpected error occurred: {e}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
except requests.exceptions.RequestException as e:
|
| 36 |
return f"Error fetching URL: {e}"
|
| 37 |
except Exception as e:
|
| 38 |
+
return f"An unexpected error occurred: {e}"
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
# from playwright.sync_api import sync_playwright
|
| 42 |
+
|
| 43 |
+
# def scrape_dynamic_url(url: str) -> str:
|
| 44 |
+
# """Scrapes a dynamic URL using Playwright and returns the final HTML."""
|
| 45 |
+
# with sync_playwright() as p:
|
| 46 |
+
# browser = p.chromium.launch()
|
| 47 |
+
# page = browser.new_page()
|
| 48 |
+
# page.goto(url, wait_until='networkidle') # Wait for network activity to cease
|
| 49 |
+
# html_content = page.content()
|
| 50 |
+
# browser.close()
|
| 51 |
+
# return html_content
|