sarim commited on
Commit
d4b7c04
·
1 Parent(s): ea765f8

company details

Browse files
Files changed (3) hide show
  1. app.py +9 -3
  2. models.py +97 -3
  3. psx_scraper.py +237 -0
app.py CHANGED
@@ -11,6 +11,7 @@ from models import PsxMarketResponse,PsxStock
11
  from threading import Thread
12
  from datetime import datetime
13
  import re
 
14
 
15
 
16
  CACHE = {
@@ -389,7 +390,12 @@ def get_gainers_loosers():
389
 
390
  return CACHE["gainers"]
391
 
392
- @app.get("/get_announcements")
393
- def get_announcements():
394
 
395
- return get_announcements_scrap()
 
 
 
 
 
 
11
  from threading import Thread
12
  from datetime import datetime
13
  import re
14
+ from psx_scraper import PsxScraper
15
 
16
 
17
  CACHE = {
 
390
 
391
  return CACHE["gainers"]
392
 
393
+ @app.get("/get_symbol_detail{symbol}")
394
+ def get_announcements(symbol:str):
395
 
396
+ r = requests.get(f'https://dps.psx.com.pk/company/{symbol}')
397
+
398
+ scraper = PsxScraper(html_content=r.text)
399
+ company_data = scraper.scrape_all_data()
400
+
401
+ return company_data
models.py CHANGED
@@ -1,5 +1,6 @@
1
- from typing import Dict, List
2
- from pydantic import BaseModel
 
3
 
4
 
5
  class PsxStock(BaseModel):
@@ -16,4 +17,97 @@ class PsxStock(BaseModel):
16
 
17
 
18
  class PsxMarketResponse(BaseModel):
19
- sectors: Dict[str, List[PsxStock]]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, List, Optional
2
+ from pydantic import BaseModel, Field, validator
3
+ from datetime import datetime
4
 
5
 
6
  class PsxStock(BaseModel):
 
17
 
18
 
19
  class PsxMarketResponse(BaseModel):
20
+ sectors: Dict[str, List[PsxStock]]
21
+
22
+
23
+
24
+ class CircuitBreaker(BaseModel):
25
+ lower_limit: float = Field(..., alias="lowerLimit")
26
+ upper_limit: float = Field(..., alias="upperLimit")
27
+ current_price: float = Field(..., alias="currentPrice")
28
+
29
+ class DayRange(BaseModel):
30
+ low: float
31
+ high: float
32
+ current: float
33
+
34
+ class YearRange(BaseModel):
35
+ low: float
36
+ high: float
37
+ current: float
38
+
39
+ class TradingStats(BaseModel):
40
+ open_price: Optional[float] = Field(None, alias="open")
41
+ high_price: Optional[float] = Field(None, alias="high")
42
+ low_price: Optional[float] = Field(None, alias="low")
43
+ close_price: Optional[float] = Field(None, alias="close")
44
+ volume: Optional[int] = None
45
+ ask_price: Optional[float] = Field(None, alias="askPrice")
46
+ ask_volume: Optional[int] = Field(None, alias="askVolume")
47
+ bid_price: Optional[float] = Field(None, alias="bidPrice")
48
+ bid_volume: Optional[int] = Field(None, alias="bidVolume")
49
+ ldcp: Optional[float] = None
50
+ var: Optional[float] = None
51
+ haircut: Optional[float] = None
52
+ pe_ratio: Optional[float] = Field(None, alias="peRatio")
53
+
54
+ class QuoteData(BaseModel):
55
+ company_name: str = Field(..., alias="companyName")
56
+ symbol: str
57
+ sector: str
58
+ current_price: float = Field(..., alias="currentPrice")
59
+ change: float
60
+ change_percent: float = Field(..., alias="changePercent")
61
+ circuit_breaker: CircuitBreaker
62
+ day_range: DayRange = Field(..., alias="dayRange")
63
+ year_range: YearRange = Field(..., alias="yearRange")
64
+ trading_stats: TradingStats = Field(..., alias="tradingStats")
65
+ one_year_change: Optional[float] = Field(None, alias="oneYearChange")
66
+ ytd_change: Optional[float] = Field(None, alias="ytdChange")
67
+
68
+ class FinancialResult(BaseModel):
69
+ date: str
70
+ title: str
71
+ document_link: Optional[str] = Field(None, alias="documentLink")
72
+ pdf_link: Optional[str] = Field(None, alias="pdfLink")
73
+
74
+ class FinancialEntry(BaseModel):
75
+ period: str
76
+ sales: Optional[float] = None
77
+ profit_after_tax: Optional[float] = Field(None, alias="profitAfterTax")
78
+ eps: Optional[float] = None
79
+
80
+ class Financials(BaseModel):
81
+ annual: List[FinancialEntry]
82
+ quarterly: List[FinancialEntry]
83
+
84
+ class RatioEntry(BaseModel):
85
+ period: str
86
+ gross_profit_margin: Optional[float] = Field(None, alias="grossProfitMargin")
87
+ net_profit_margin: Optional[float] = Field(None, alias="netProfitMargin")
88
+ eps_growth: Optional[float] = Field(None, alias="epsGrowth")
89
+ peg: Optional[float] = None
90
+
91
+ class CompanyProfile(BaseModel):
92
+ business_description: str = Field(..., alias="businessDescription")
93
+ key_people: List[Dict[str, str]] = Field(..., alias="keyPeople")
94
+ address: str
95
+ website: str
96
+ registrar: str
97
+ auditor: str
98
+ fiscal_year_end: str = Field(..., alias="fiscalYearEnd")
99
+
100
+ class EquityProfile(BaseModel):
101
+ market_cap: float = Field(..., alias="marketCap")
102
+ shares: int
103
+ free_float_units: int = Field(..., alias="freeFloatUnits")
104
+ free_float_percent: float = Field(..., alias="freeFloatPercent")
105
+
106
+ class CompanyData(BaseModel):
107
+ # quote: QuoteData
108
+ # profile: CompanyProfile
109
+ # equity: EquityProfile
110
+ announcements: List[FinancialResult]
111
+ financials: Financials
112
+ ratios: List[RatioEntry]
113
+ timestamp: datetime = Field(default_factory=datetime.now)
psx_scraper.py ADDED
@@ -0,0 +1,237 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from bs4 import BeautifulSoup
2
+ import re
3
+ from models import FinancialResult,FinancialEntry,Financials,RatioEntry,CompanyData
4
+ from typing import List, Optional, Dict, Any
5
+
6
+ class PsxScraper(object):
7
+ def __init__(self, html_content:str):
8
+ self.soup = BeautifulSoup(html_content, 'html.parser')
9
+
10
+ def _clean_number(self, text: str) -> float:
11
+ """Clean and convert number strings to float"""
12
+ if not text:
13
+ return 0.0
14
+ # Remove commas, spaces, and non-numeric characters except decimal points and minus signs
15
+ text = str(text).replace(',', '').replace(' ', '').replace('Rs.', '')
16
+ # Extract numbers with optional decimal points
17
+ match = re.search(r'[-+]?\d*\.?\d+', text)
18
+ return float(match.group()) if match else 0.0
19
+
20
+ def _extract_range(self, range_text: str) -> Dict[str, float]:
21
+ """Extract low, high, and current values from range strings"""
22
+ # Example: "296.08 — 361.88"
23
+ parts = range_text.split('—')
24
+ if len(parts) == 2:
25
+ return {
26
+ 'low': self._clean_number(parts[0]),
27
+ 'high': self._clean_number(parts[1]),
28
+ 'current': 0.0 # Will be set from data attributes
29
+ }
30
+ return {'low': 0.0, 'high': 0.0, 'current': 0.0}
31
+
32
+
33
+ def extract_announcements(self) -> List[FinancialResult]:
34
+ """Extract financial results announcements"""
35
+ announcements = []
36
+
37
+ # Look for financial results tab
38
+ financial_results_tab = self.soup.find('div', class_='tabs__panel', attrs={'data-name': 'Financial Results'})
39
+ if not financial_results_tab:
40
+ return announcements
41
+
42
+ table = financial_results_tab.find('table')
43
+ if not table:
44
+ return announcements
45
+
46
+ rows = table.find_all('tr')[1:] # Skip header row
47
+ for row in rows:
48
+ cols = row.find_all('td')
49
+ if len(cols) >= 3:
50
+ date = cols[0].text.strip()
51
+ title = cols[1].text.strip()
52
+
53
+ # Extract links
54
+ document_link = None
55
+ pdf_link = None
56
+
57
+ links = cols[2].find_all('a')
58
+ for link in links:
59
+ href = link.get('href', '')
60
+ if 'javascript:' in href:
61
+ document_link = href
62
+ elif '.pdf' in href:
63
+ pdf_link = href
64
+
65
+ announcements.append(FinancialResult(
66
+ date=date,
67
+ title=title,
68
+ documentLink=document_link,
69
+ pdfLink=pdf_link
70
+ ))
71
+
72
+ return announcements
73
+
74
+
75
+ def extract_financials(self) -> Financials:
76
+ """Extract financial data (annual and quarterly)"""
77
+ annual_data = []
78
+ quarterly_data = []
79
+
80
+ # Find the financials section
81
+ financials_section = self.soup.find('div', id='financials')
82
+ if not financials_section:
83
+ return Financials(annual=[], quarterly=[])
84
+
85
+ # Extract annual financials
86
+ annual_tab = financials_section.find('div', class_='tabs__panel', attrs={'data-name': 'Annual'})
87
+ if annual_tab:
88
+ table = annual_tab.find('table')
89
+ if table:
90
+ headers = []
91
+ rows_data = []
92
+
93
+ # Extract headers
94
+ header_row = table.find('thead').find('tr')
95
+ for th in header_row.find_all('th'):
96
+ headers.append(th.text.strip())
97
+
98
+ # Extract data rows
99
+ body_rows = table.find('tbody').find_all('tr')
100
+ for row in body_rows:
101
+ row_data = {}
102
+ cells = row.find_all('td')
103
+ if len(cells) == len(headers):
104
+ for i, cell in enumerate(cells):
105
+ row_data[headers[i]] = cell.text.strip()
106
+ rows_data.append(row_data)
107
+
108
+ # Process annual data
109
+ if headers and rows_data:
110
+ for i in range(1, len(headers)): # Skip first header (metric names)
111
+ period = headers[i]
112
+ entry = FinancialEntry(period=period)
113
+
114
+ for row in rows_data:
115
+ metric = row[headers[0]]
116
+ value = row[period]
117
+
118
+ if 'Sales' in metric:
119
+ entry.sales = self._clean_number(value)
120
+ elif 'Profit after Taxation' in metric:
121
+ entry.profit_after_tax = self._clean_number(value)
122
+ elif 'EPS' in metric:
123
+ entry.eps = self._clean_number(value)
124
+
125
+ annual_data.append(entry)
126
+
127
+ # Extract quarterly financials
128
+ quarterly_tab = financials_section.find('div', class_='tabs__panel', attrs={'data-name': 'Quarterly'})
129
+ if quarterly_tab:
130
+ table = quarterly_tab.find('table')
131
+ if table:
132
+ headers = []
133
+ rows_data = []
134
+
135
+ # Extract headers
136
+ header_row = table.find('thead').find('tr')
137
+ for th in header_row.find_all('th'):
138
+ headers.append(th.text.strip())
139
+
140
+ # Extract data rows
141
+ body_rows = table.find('tbody').find_all('tr')
142
+ for row in body_rows:
143
+ row_data = {}
144
+ cells = row.find_all('td')
145
+ if len(cells) == len(headers):
146
+ for i, cell in enumerate(cells):
147
+ row_data[headers[i]] = cell.text.strip()
148
+ rows_data.append(row_data)
149
+
150
+ # Process quarterly data
151
+ if headers and rows_data:
152
+ for i in range(1, len(headers)): # Skip first header (metric names)
153
+ period = headers[i]
154
+ entry = FinancialEntry(period=period)
155
+
156
+ for row in rows_data:
157
+ metric = row[headers[0]]
158
+ value = row[period]
159
+
160
+ if 'Sales' in metric:
161
+ entry.sales = self._clean_number(value)
162
+ elif 'Profit after Taxation' in metric:
163
+ entry.profit_after_tax = self._clean_number(value)
164
+ elif 'EPS' in metric:
165
+ entry.eps = self._clean_number(value)
166
+
167
+ quarterly_data.append(entry)
168
+
169
+ return Financials(annual=annual_data, quarterly=quarterly_data)
170
+
171
+
172
+ def extract_ratios(self) -> List[RatioEntry]:
173
+ """Extract financial ratios"""
174
+ ratios = []
175
+
176
+ ratios_section = self.soup.find('div', id='ratios')
177
+ if not ratios_section:
178
+ return ratios
179
+
180
+ table = ratios_section.find('table')
181
+ if not table:
182
+ return ratios
183
+
184
+ headers = []
185
+ rows_data = []
186
+
187
+ # Extract headers
188
+ header_row = table.find('thead').find('tr')
189
+ for th in header_row.find_all('th'):
190
+ headers.append(th.text.strip())
191
+
192
+ # Extract data rows
193
+ body_rows = table.find('tbody').find_all('tr')
194
+ for row in body_rows:
195
+ row_data = {}
196
+ cells = row.find_all('td')
197
+ if len(cells) == len(headers):
198
+ for i, cell in enumerate(cells):
199
+ row_data[headers[i]] = cell.text.strip()
200
+ rows_data.append(row_data)
201
+
202
+ # Process ratio data
203
+ if headers and rows_data:
204
+ for i in range(1, len(headers)): # Skip first header (ratio names)
205
+ period = headers[i]
206
+ entry = RatioEntry(period=period)
207
+
208
+ for row in rows_data:
209
+ ratio_name = row[headers[0]]
210
+ value = row[period]
211
+
212
+ # Clean value (remove parentheses for negative numbers)
213
+ clean_value = value.replace('(', '').replace(')', '')
214
+
215
+ if 'Gross Profit Margin' in ratio_name:
216
+ entry.gross_profit_margin = self._clean_number(clean_value)
217
+ elif 'Net Profit Margin' in ratio_name:
218
+ entry.net_profit_margin = self._clean_number(clean_value)
219
+ elif 'EPS Growth' in ratio_name:
220
+ entry.eps_growth = self._clean_number(clean_value)
221
+ elif 'PEG' in ratio_name:
222
+ entry.peg = self._clean_number(clean_value)
223
+
224
+ ratios.append(entry)
225
+
226
+ return ratios
227
+
228
+ def scrape_all_data(self) -> CompanyData:
229
+ """Scrape all data and return as CompanyData object"""
230
+ return CompanyData(
231
+ announcements=self.extract_announcements(),
232
+ financials=self.extract_financials(),
233
+ ratios=self.extract_ratios()
234
+ )
235
+
236
+
237
+