sarim commited on
Commit
af8ef33
·
1 Parent(s): 9c6d4b6

get dividend history

Browse files
Files changed (2) hide show
  1. app.py +47 -3
  2. requirements.txt +3 -1
app.py CHANGED
@@ -1,11 +1,55 @@
1
  from fastapi import FastAPI
 
 
2
 
3
  app = FastAPI(
4
  title="PSX web scraper",
5
- docs_url="/",
6
- description="Scrape data from psx website"
7
  )
8
 
9
  @app.get("/hello")
10
  def greet_json():
11
- return {"Hello": "World!"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from fastapi import FastAPI
2
+ from bs4 import BeautifulSoup, Tag
3
+ import requests
4
 
5
  app = FastAPI(
6
  title="PSX web scraper",
7
+ docs_url="/",
8
+ description="Scrape data from PSX website"
9
  )
10
 
11
  @app.get("/hello")
12
  def greet_json():
13
+ return {"Hello": "World!"}
14
+
15
+
16
+ @app.get("/dividend_history")
17
+ def get_dividend():
18
+ url = "https://www.psx.com.pk/psx/announcement/financial-announcements"
19
+ r = requests.get(url)
20
+ soup = BeautifulSoup(r.text, "html.parser")
21
+
22
+ table = soup.find("table")
23
+ if table is None:
24
+ return {"error": "No table found on PSX page"}
25
+
26
+ rows = table.find_all("tr")
27
+ data = []
28
+ current_date = None
29
+
30
+ for row in rows:
31
+ # Skip non-tag elements to avoid "NavigableString" errors
32
+ if not isinstance(row, Tag):
33
+ continue
34
+
35
+ # Date section header
36
+ header = row.find("h4")
37
+ if header:
38
+ current_date = header.get_text(strip=True)
39
+ continue
40
+
41
+ # Regular rows
42
+ cols = row.find_all("td")
43
+ if len(cols) < 4:
44
+ continue
45
+
46
+ company = cols[0].get_text(strip=True)
47
+ dividend_amount = cols[3].get_text(strip=True)
48
+
49
+ data.append({
50
+ "date": current_date,
51
+ "company": company,
52
+ "dividend_amount": dividend_amount
53
+ })
54
+
55
+ return data
requirements.txt CHANGED
@@ -1,2 +1,4 @@
1
  fastapi
2
- uvicorn
 
 
 
1
  fastapi
2
+ uvicorn
3
+ requests
4
+ BeautifulSoup