sarim commited on
Commit
ec315ba
·
1 Parent(s): c56da52
Files changed (2) hide show
  1. app.py +58 -40
  2. requirements.txt +2 -1
app.py CHANGED
@@ -1,5 +1,11 @@
1
  from fastapi import FastAPI
2
- from bs4 import BeautifulSoup, Tag
 
 
 
 
 
 
3
  import requests
4
 
5
  app = FastAPI(
@@ -8,49 +14,61 @@ app = FastAPI(
8
  description="Scrape data from PSX website"
9
  )
10
 
11
- def simple_dividend_extraction():
12
- """
13
- Simple function to extract dividend data
14
- """
15
- r = requests.get('https://sarmaaya.pk/announcements?cat=payouts')
16
- print("request done")
17
-
18
- soup = BeautifulSoup(r.text, 'html.parser')
19
- table = soup.find('table', class_ = 'caption-bottom w-full min-w-full text-xs sm:text-sm')
20
- print(table)
21
 
22
- results = []
 
23
 
24
- if table:
25
- print("in table")
 
 
 
 
 
 
 
 
 
26
 
27
- #rows = table.find('tbody').findChildren('tr')
28
- rows = table.find_all('tr')[1:]
29
- print(f"Number of rows found: {len(rows)}")
30
 
31
- for row in rows:
32
- if not hasattr(row, 'find_all'):
33
- print("no data")
34
- continue
35
- print(f"\nRow type: {type(row)}")
36
- print(f"Row contents: {row}")
37
- print("in row")
38
- cells = row.find_all('td')
39
 
40
- if len(cells) >= 6:
41
- company_name = cells[1].get_text(strip=True)
42
- dividend_amount = cells[2].get_text(strip=True) or "No dividend"
43
- dividend_date = cells[5].get_text(strip=True) or "No date"
44
- print("adding data")
45
-
46
- results.append({
47
- 'Company': company_name,
48
- 'Dividend': dividend_amount,
49
- 'Date': dividend_date
50
- })
51
- print(results)
52
-
53
- return results
 
 
 
 
 
 
 
 
54
 
55
  # Usage example for the simple version
56
  # dividend_info = simple_dividend_extraction('text.html')
@@ -64,7 +82,7 @@ def greet_json():
64
 
65
  @app.get("/dividend_history")
66
  def get_dividend():
67
- return simple_dividend_extraction()
68
 
69
 
70
 
 
1
  from fastapi import FastAPI
2
+ from selenium import webdriver
3
+ from selenium.webdriver.chrome.options import Options
4
+ from selenium.webdriver.common.by import By
5
+ from selenium.webdriver.support.ui import WebDriverWait
6
+ from selenium.webdriver.support import expected_conditions as EC
7
+ from bs4 import BeautifulSoup
8
+ import time
9
  import requests
10
 
11
  app = FastAPI(
 
14
  description="Scrape data from PSX website"
15
  )
16
 
17
+
18
+ def selenium_dividend_extraction():
19
+ # Set up Chrome options
20
+ chrome_options = Options()
21
+ chrome_options.add_argument("--headless") # Run in background
22
+ chrome_options.add_argument("--no-sandbox")
23
+ chrome_options.add_argument("--disable-dev-shm-usage")
 
 
 
24
 
25
+ # Initialize the driver
26
+ driver = webdriver.Chrome(options=chrome_options)
27
 
28
+ try:
29
+ driver.get('https://scstrade.com/MarketStatistics/MS_xDates.aspx')
30
+ print("Page loaded")
31
+
32
+ # Wait for the table to load (adjust timeout as needed)
33
+ wait = WebDriverWait(driver, 10)
34
+ wait.until(EC.presence_of_element_located((By.ID, "list")))
35
+
36
+ # Get the page source after JavaScript execution
37
+ page_source = driver.page_source
38
+ soup = BeautifulSoup(page_source, 'html.parser')
39
 
40
+ table = soup.find('table', id='list')
41
+ print(table)
42
+ results = []
43
 
44
+ if table:
45
+ print("Table found with Selenium")
46
+ # Use CSS selector to get data rows
47
+ rows = table.select('tr.jqgrow')
48
+ print(f"Number of data rows found: {len(rows)}")
 
 
 
49
 
50
+ for row in rows:
51
+ cells = row.find_all('td')
52
+ if len(cells) >= 6:
53
+ company_code = cells[0].get_text(strip=True)
54
+ company_name = cells[1].get_text(strip=True)
55
+ dividend_amount = cells[2].get_text(strip=True) or "No dividend"
56
+ expiry_date = cells[5].get_text(strip=True) or "No date"
57
+
58
+ results.append({
59
+ 'Company_Code': company_code,
60
+ 'Company_Name': company_name,
61
+ 'Dividend': dividend_amount,
62
+ 'Expiry_Date': expiry_date
63
+ })
64
+
65
+ return results
66
+
67
+ except Exception as e:
68
+ print(f"Error: {e}")
69
+ return []
70
+ finally:
71
+ driver.quit()
72
 
73
  # Usage example for the simple version
74
  # dividend_info = simple_dividend_extraction('text.html')
 
82
 
83
  @app.get("/dividend_history")
84
  def get_dividend():
85
+ return selenium_dividend_extraction()
86
 
87
 
88
 
requirements.txt CHANGED
@@ -1,4 +1,5 @@
1
  fastapi
2
  uvicorn
3
  requests
4
- beautifulsoup4
 
 
1
  fastapi
2
  uvicorn
3
  requests
4
+ beautifulsoup4
5
+ selenium