sarim commited on
Commit
1dd03e1
·
1 Parent(s): ec315ba
Files changed (1) hide show
  1. app.py +42 -56
app.py CHANGED
@@ -1,9 +1,4 @@
1
  from fastapi import FastAPI
2
- from selenium import webdriver
3
- from selenium.webdriver.chrome.options import Options
4
- from selenium.webdriver.common.by import By
5
- from selenium.webdriver.support.ui import WebDriverWait
6
- from selenium.webdriver.support import expected_conditions as EC
7
  from bs4 import BeautifulSoup
8
  import time
9
  import requests
@@ -14,61 +9,52 @@ app = FastAPI(
14
  description="Scrape data from PSX website"
15
  )
16
 
17
-
18
- def selenium_dividend_extraction():
19
- # Set up Chrome options
20
- chrome_options = Options()
21
- chrome_options.add_argument("--headless") # Run in background
22
- chrome_options.add_argument("--no-sandbox")
23
- chrome_options.add_argument("--disable-dev-shm-usage")
24
 
25
- # Initialize the driver
26
- driver = webdriver.Chrome(options=chrome_options)
27
 
28
- try:
29
- driver.get('https://scstrade.com/MarketStatistics/MS_xDates.aspx')
30
- print("Page loaded")
31
-
32
- # Wait for the table to load (adjust timeout as needed)
33
- wait = WebDriverWait(driver, 10)
34
- wait.until(EC.presence_of_element_located((By.ID, "list")))
35
-
36
- # Get the page source after JavaScript execution
37
- page_source = driver.page_source
38
- soup = BeautifulSoup(page_source, 'html.parser')
39
 
40
- table = soup.find('table', id='list')
41
- print(table)
42
- results = []
43
 
44
- if table:
45
- print("Table found with Selenium")
46
- # Use CSS selector to get data rows
47
- rows = table.select('tr.jqgrow')
48
- print(f"Number of data rows found: {len(rows)}")
 
 
 
49
 
50
- for row in rows:
51
- cells = row.find_all('td')
52
- if len(cells) >= 6:
53
- company_code = cells[0].get_text(strip=True)
54
- company_name = cells[1].get_text(strip=True)
55
- dividend_amount = cells[2].get_text(strip=True) or "No dividend"
56
- expiry_date = cells[5].get_text(strip=True) or "No date"
57
-
58
- results.append({
59
- 'Company_Code': company_code,
60
- 'Company_Name': company_name,
61
- 'Dividend': dividend_amount,
62
- 'Expiry_Date': expiry_date
63
- })
64
-
65
- return results
66
-
67
- except Exception as e:
68
- print(f"Error: {e}")
69
- return []
70
- finally:
71
- driver.quit()
72
 
73
  # Usage example for the simple version
74
  # dividend_info = simple_dividend_extraction('text.html')
@@ -82,7 +68,7 @@ def greet_json():
82
 
83
  @app.get("/dividend_history")
84
  def get_dividend():
85
- return selenium_dividend_extraction()
86
 
87
 
88
 
 
1
  from fastapi import FastAPI
 
 
 
 
 
2
  from bs4 import BeautifulSoup
3
  import time
4
  import requests
 
9
  description="Scrape data from PSX website"
10
  )
11
 
12
+ def simple_dividend_extraction():
13
+ """
14
+ Simple function to extract dividend data
15
+ """
16
+ r = requests.get('https://www.psx.com.pk/psx/announcement/financial-announcements')
17
+ print("request done")
 
18
 
 
 
19
 
20
+ soup = BeautifulSoup(r.text, 'html.parser')
21
+ table = soup.find('table')
22
+ _table = soup.select('list')
23
+ print(_table)
24
+ print(table)
25
+
26
+ results = []
27
+
28
+ if table:
29
+ print("in table")
 
30
 
31
+ #rows = table.find('tbody').findChildren('tr')
32
+ rows = table.find_all('tr')[1:]
33
+ print(f"Number of rows found: {len(rows)}")
34
 
35
+ for row in rows:
36
+ if not hasattr(row, 'find_all'):
37
+ print("no data")
38
+ continue
39
+ print(f"\nRow type: {type(row)}")
40
+ print(f"Row contents: {row}")
41
+ print("in row")
42
+ cells = row.find_all('td')
43
 
44
+ if len(cells) >= 6:
45
+ company_name = cells[1].get_text(strip=True)
46
+ dividend_amount = cells[2].get_text(strip=True) or "No dividend"
47
+ dividend_date = cells[5].get_text(strip=True) or "No date"
48
+ print("adding data")
49
+
50
+ results.append({
51
+ 'Company': company_name,
52
+ 'Dividend': dividend_amount,
53
+ 'Date': dividend_date
54
+ })
55
+ print(results)
56
+
57
+ return results
 
 
 
 
 
 
 
 
58
 
59
  # Usage example for the simple version
60
  # dividend_info = simple_dividend_extraction('text.html')
 
68
 
69
  @app.get("/dividend_history")
70
  def get_dividend():
71
+ return simple_dividend_extraction()
72
 
73
 
74