|
|
from fastapi import FastAPI |
|
|
from bs4 import BeautifulSoup |
|
|
import time |
|
|
import requests |
|
|
from fastapi.responses import HTMLResponse |
|
|
from starlette.responses import FileResponse |
|
|
|
|
|
app = FastAPI( |
|
|
title="PSX web scraper", |
|
|
docs_url="/", |
|
|
description="Scrape data from PSX website" |
|
|
) |
|
|
|
|
|
|
|
|
def simple_dividend_extraction(): |
|
|
""" |
|
|
Simple function to extract dividend data |
|
|
""" |
|
|
r = requests.get('https://www.psx.com.pk/psx/announcement/financial-announcements') |
|
|
print("request done") |
|
|
|
|
|
|
|
|
soup = BeautifulSoup(r.text, 'html.parser') |
|
|
table = soup.find('table') |
|
|
_table = soup.select('list') |
|
|
print(_table) |
|
|
print(table) |
|
|
|
|
|
results = [] |
|
|
|
|
|
if table: |
|
|
print("in table") |
|
|
|
|
|
|
|
|
rows = table.find_all('tr')[1:] |
|
|
print(f"Number of rows found: {len(rows)}") |
|
|
|
|
|
for row in rows: |
|
|
if not hasattr(row, 'find_all'): |
|
|
print("no data") |
|
|
continue |
|
|
print(f"\nRow type: {type(row)}") |
|
|
print(f"Row contents: {row}") |
|
|
print("in row") |
|
|
cells = row.find_all('td') |
|
|
|
|
|
if len(cells) >= 6: |
|
|
company_name = cells[0].get_text(strip=True) |
|
|
dividend_amount = cells[3].get_text(strip=True) or "No dividend" |
|
|
dividend_date = cells[8].get_text(strip=True) or "No date" |
|
|
board_meeting = cells[7].get_text(strip=True) or "No meeting" |
|
|
eps = cells[6].get_text(strip=True) or "No eps" |
|
|
profit_loss_before_tax = cells[4].get_text(strip=True) or "No profit/loss" |
|
|
profit_loss_after_tax = cells[5].get_text(strip=True) or "No profit/loss" |
|
|
year_ended = cells[2].get_text(strip=True) or "No profit/loss" |
|
|
|
|
|
results.append({ |
|
|
'Company': company_name, |
|
|
'Dividend': dividend_amount, |
|
|
'Date': dividend_date, |
|
|
'BoardMeeting':board_meeting, |
|
|
"Eps":eps, |
|
|
'profitLossBeforeTax':profit_loss_before_tax, |
|
|
'profitLossAfterTax':profit_loss_after_tax, |
|
|
"yearEnded":year_ended |
|
|
}) |
|
|
|
|
|
return results |
|
|
|
|
|
|
|
|
@app.get("/hello") |
|
|
def greet_json(): |
|
|
return {"Hello": "World!"} |
|
|
|
|
|
|
|
|
@app.get("/dividend_history") |
|
|
def get_dividend(): |
|
|
return simple_dividend_extraction() |
|
|
|
|
|
@app.get("/PrivacyPolicy") |
|
|
def get_privacy_policy(): |
|
|
|
|
|
return FileResponse('text.html') |
|
|
|
|
|
@app.get("/portfolio") |
|
|
def get_portfolio(): |
|
|
return FileResponse('portfolio.html') |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|