pix / app.py
sarim's picture
add portfolio
58218b6
from fastapi import FastAPI
from bs4 import BeautifulSoup
import time
import requests
from fastapi.responses import HTMLResponse
from starlette.responses import FileResponse
app = FastAPI(
title="PSX web scraper",
docs_url="/",
description="Scrape data from PSX website"
)
def simple_dividend_extraction():
"""
Simple function to extract dividend data
"""
r = requests.get('https://www.psx.com.pk/psx/announcement/financial-announcements')
print("request done")
soup = BeautifulSoup(r.text, 'html.parser')
table = soup.find('table')
_table = soup.select('list')
print(_table)
print(table)
results = []
if table:
print("in table")
#rows = table.find('tbody').findChildren('tr')
rows = table.find_all('tr')[1:]
print(f"Number of rows found: {len(rows)}")
for row in rows:
if not hasattr(row, 'find_all'):
print("no data")
continue
print(f"\nRow type: {type(row)}")
print(f"Row contents: {row}")
print("in row")
cells = row.find_all('td')
if len(cells) >= 6:
company_name = cells[0].get_text(strip=True)
dividend_amount = cells[3].get_text(strip=True) or "No dividend"
dividend_date = cells[8].get_text(strip=True) or "No date"
board_meeting = cells[7].get_text(strip=True) or "No meeting"
eps = cells[6].get_text(strip=True) or "No eps"
profit_loss_before_tax = cells[4].get_text(strip=True) or "No profit/loss"
profit_loss_after_tax = cells[5].get_text(strip=True) or "No profit/loss"
year_ended = cells[2].get_text(strip=True) or "No profit/loss"
results.append({
'Company': company_name,
'Dividend': dividend_amount,
'Date': dividend_date,
'BoardMeeting':board_meeting,
"Eps":eps,
'profitLossBeforeTax':profit_loss_before_tax,
'profitLossAfterTax':profit_loss_after_tax,
"yearEnded":year_ended
})
return results
@app.get("/hello")
def greet_json():
return {"Hello": "World!"}
@app.get("/dividend_history")
def get_dividend():
return simple_dividend_extraction()
@app.get("/PrivacyPolicy")
def get_privacy_policy():
return FileResponse('text.html')
@app.get("/portfolio")
def get_portfolio():
return FileResponse('portfolio.html')