KarthikMuraliM commited on
Commit
04090ba
·
1 Parent(s): 74392da

added pandas and numpy

Browse files
Files changed (2) hide show
  1. requirements.txt +3 -0
  2. tools.py +14 -1
requirements.txt CHANGED
@@ -5,3 +5,6 @@ openai
5
  requests # Add this line for making HTTP requests
6
  beautifulsoup4 # Add this line for parsing HTML
7
  lxml # Add this line, it's a fast parser for BeautifulSoup
 
 
 
 
5
  requests # Add this line for making HTTP requests
6
  beautifulsoup4 # Add this line for parsing HTML
7
  lxml # Add this line, it's a fast parser for BeautifulSoup
8
+ pandas
9
+ numpy
10
+ # playwright
tools.py CHANGED
@@ -35,4 +35,17 @@ def scrape_url_to_dataframe(url: str) -> (pd.DataFrame | str):
35
  except requests.exceptions.RequestException as e:
36
  return f"Error fetching URL: {e}"
37
  except Exception as e:
38
- return f"An unexpected error occurred: {e}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  except requests.exceptions.RequestException as e:
36
  return f"Error fetching URL: {e}"
37
  except Exception as e:
38
+ return f"An unexpected error occurred: {e}"
39
+
40
+
41
+ # from playwright.sync_api import sync_playwright
42
+
43
+ # def scrape_dynamic_url(url: str) -> str:
44
+ # """Scrapes a dynamic URL using Playwright and returns the final HTML."""
45
+ # with sync_playwright() as p:
46
+ # browser = p.chromium.launch()
47
+ # page = browser.new_page()
48
+ # page.goto(url, wait_until='networkidle') # Wait for network activity to cease
49
+ # html_content = page.content()
50
+ # browser.close()
51
+ # return html_content