Spaces:

sarim
/

pix

Running

App Files Files Community

sarim commited on Jan 20

Commit

d4b7c04

1 Parent(s): ea765f8

company details

Browse files

Files changed (3) hide show

app.py +9 -3
models.py +97 -3
psx_scraper.py +237 -0

app.py CHANGED Viewed

@@ -11,6 +11,7 @@ from models import PsxMarketResponse,PsxStock
 from threading import Thread
 from datetime import datetime
 import re
 CACHE = {
@@ -389,7 +390,12 @@ def get_gainers_loosers():
     return CACHE["gainers"]
-@app.get("/get_announcements")
-def get_announcements():
-    return get_announcements_scrap()

 from threading import Thread
 from datetime import datetime
 import re
+from psx_scraper import PsxScraper
 CACHE = {
     return CACHE["gainers"]
+@app.get("/get_symbol_detail{symbol}")
+def get_announcements(symbol:str):
+    r = requests.get(f'https://dps.psx.com.pk/company/{symbol}')
+    scraper = PsxScraper(html_content=r.text)
+    company_data = scraper.scrape_all_data()
+    return company_data

models.py CHANGED Viewed

@@ -1,5 +1,6 @@
-from typing import Dict, List
-from pydantic import BaseModel
 class PsxStock(BaseModel):
@@ -16,4 +17,97 @@ class PsxStock(BaseModel):
 class PsxMarketResponse(BaseModel):
-    sectors: Dict[str, List[PsxStock]]

+from typing import Dict, List, Optional
+from pydantic import BaseModel, Field, validator
+from datetime import datetime
 class PsxStock(BaseModel):
 class PsxMarketResponse(BaseModel):
+    sectors: Dict[str, List[PsxStock]]
+class CircuitBreaker(BaseModel):
+    lower_limit: float = Field(..., alias="lowerLimit")
+    upper_limit: float = Field(..., alias="upperLimit")
+    current_price: float = Field(..., alias="currentPrice")
+class DayRange(BaseModel):
+    low: float
+    high: float
+    current: float
+class YearRange(BaseModel):
+    low: float
+    high: float
+    current: float
+class TradingStats(BaseModel):
+    open_price: Optional[float] = Field(None, alias="open")
+    high_price: Optional[float] = Field(None, alias="high")
+    low_price: Optional[float] = Field(None, alias="low")
+    close_price: Optional[float] = Field(None, alias="close")
+    volume: Optional[int] = None
+    ask_price: Optional[float] = Field(None, alias="askPrice")
+    ask_volume: Optional[int] = Field(None, alias="askVolume")
+    bid_price: Optional[float] = Field(None, alias="bidPrice")
+    bid_volume: Optional[int] = Field(None, alias="bidVolume")
+    ldcp: Optional[float] = None
+    var: Optional[float] = None
+    haircut: Optional[float] = None
+    pe_ratio: Optional[float] = Field(None, alias="peRatio")
+class QuoteData(BaseModel):
+    company_name: str = Field(..., alias="companyName")
+    symbol: str
+    sector: str
+    current_price: float = Field(..., alias="currentPrice")
+    change: float
+    change_percent: float = Field(..., alias="changePercent")
+    circuit_breaker: CircuitBreaker
+    day_range: DayRange = Field(..., alias="dayRange")
+    year_range: YearRange = Field(..., alias="yearRange")
+    trading_stats: TradingStats = Field(..., alias="tradingStats")
+    one_year_change: Optional[float] = Field(None, alias="oneYearChange")
+    ytd_change: Optional[float] = Field(None, alias="ytdChange")
+class FinancialResult(BaseModel):
+    date: str
+    title: str
+    document_link: Optional[str] = Field(None, alias="documentLink")
+    pdf_link: Optional[str] = Field(None, alias="pdfLink")
+class FinancialEntry(BaseModel):
+    period: str
+    sales: Optional[float] = None
+    profit_after_tax: Optional[float] = Field(None, alias="profitAfterTax")
+    eps: Optional[float] = None
+class Financials(BaseModel):
+    annual: List[FinancialEntry]
+    quarterly: List[FinancialEntry]
+class RatioEntry(BaseModel):
+    period: str
+    gross_profit_margin: Optional[float] = Field(None, alias="grossProfitMargin")
+    net_profit_margin: Optional[float] = Field(None, alias="netProfitMargin")
+    eps_growth: Optional[float] = Field(None, alias="epsGrowth")
+    peg: Optional[float] = None
+class CompanyProfile(BaseModel):
+    business_description: str = Field(..., alias="businessDescription")
+    key_people: List[Dict[str, str]] = Field(..., alias="keyPeople")
+    address: str
+    website: str
+    registrar: str
+    auditor: str
+    fiscal_year_end: str = Field(..., alias="fiscalYearEnd")
+class EquityProfile(BaseModel):
+    market_cap: float = Field(..., alias="marketCap")
+    shares: int
+    free_float_units: int = Field(..., alias="freeFloatUnits")
+    free_float_percent: float = Field(..., alias="freeFloatPercent")
+class CompanyData(BaseModel):
+    # quote: QuoteData
+    # profile: CompanyProfile
+    # equity: EquityProfile
+    announcements: List[FinancialResult]
+    financials: Financials
+    ratios: List[RatioEntry]
+    timestamp: datetime = Field(default_factory=datetime.now)

psx_scraper.py ADDED Viewed

	@@ -0,0 +1,237 @@

+from bs4 import BeautifulSoup
+import re
+from models import FinancialResult,FinancialEntry,Financials,RatioEntry,CompanyData
+from typing import List, Optional, Dict, Any
+class PsxScraper(object):
+    def __init__(self, html_content:str):
+        self.soup = BeautifulSoup(html_content, 'html.parser')
+    def _clean_number(self, text: str) -> float:
+        """Clean and convert number strings to float"""
+        if not text:
+            return 0.0
+        # Remove commas, spaces, and non-numeric characters except decimal points and minus signs
+        text = str(text).replace(',', '').replace(' ', '').replace('Rs.', '')
+        # Extract numbers with optional decimal points
+        match = re.search(r'[-+]?\d*\.?\d+', text)
+        return float(match.group()) if match else 0.0
+    def _extract_range(self, range_text: str) -> Dict[str, float]:
+        """Extract low, high, and current values from range strings"""
+        # Example: "296.08 — 361.88"
+        parts = range_text.split('—')
+        if len(parts) == 2:
+            return {
+                'low': self._clean_number(parts[0]),
+                'high': self._clean_number(parts[1]),
+                'current': 0.0  # Will be set from data attributes
+            }
+        return {'low': 0.0, 'high': 0.0, 'current': 0.0}
+    def extract_announcements(self) -> List[FinancialResult]:
+        """Extract financial results announcements"""
+        announcements = []
+        # Look for financial results tab
+        financial_results_tab = self.soup.find('div', class_='tabs__panel', attrs={'data-name': 'Financial Results'})
+        if not financial_results_tab:
+            return announcements
+        table = financial_results_tab.find('table')
+        if not table:
+            return announcements
+        rows = table.find_all('tr')[1:]  # Skip header row
+        for row in rows:
+            cols = row.find_all('td')
+            if len(cols) >= 3:
+                date = cols[0].text.strip()
+                title = cols[1].text.strip()
+                # Extract links
+                document_link = None
+                pdf_link = None
+                links = cols[2].find_all('a')
+                for link in links:
+                    href = link.get('href', '')
+                    if 'javascript:' in href:
+                        document_link = href
+                    elif '.pdf' in href:
+                        pdf_link = href
+                announcements.append(FinancialResult(
+                    date=date,
+                    title=title,
+                    documentLink=document_link,
+                    pdfLink=pdf_link
+                ))
+        return announcements
+    def extract_financials(self) -> Financials:
+        """Extract financial data (annual and quarterly)"""
+        annual_data = []
+        quarterly_data = []
+        # Find the financials section
+        financials_section = self.soup.find('div', id='financials')
+        if not financials_section:
+            return Financials(annual=[], quarterly=[])
+        # Extract annual financials
+        annual_tab = financials_section.find('div', class_='tabs__panel', attrs={'data-name': 'Annual'})
+        if annual_tab:
+            table = annual_tab.find('table')
+            if table:
+                headers = []
+                rows_data = []
+                # Extract headers
+                header_row = table.find('thead').find('tr')
+                for th in header_row.find_all('th'):
+                    headers.append(th.text.strip())
+                # Extract data rows
+                body_rows = table.find('tbody').find_all('tr')
+                for row in body_rows:
+                    row_data = {}
+                    cells = row.find_all('td')
+                    if len(cells) == len(headers):
+                        for i, cell in enumerate(cells):
+                            row_data[headers[i]] = cell.text.strip()
+                        rows_data.append(row_data)
+                # Process annual data
+                if headers and rows_data:
+                    for i in range(1, len(headers)):  # Skip first header (metric names)
+                        period = headers[i]
+                        entry = FinancialEntry(period=period)
+                        for row in rows_data:
+                            metric = row[headers[0]]
+                            value = row[period]
+                            if 'Sales' in metric:
+                                entry.sales = self._clean_number(value)
+                            elif 'Profit after Taxation' in metric:
+                                entry.profit_after_tax = self._clean_number(value)
+                            elif 'EPS' in metric:
+                                entry.eps = self._clean_number(value)
+                        annual_data.append(entry)
+        # Extract quarterly financials
+        quarterly_tab = financials_section.find('div', class_='tabs__panel', attrs={'data-name': 'Quarterly'})
+        if quarterly_tab:
+            table = quarterly_tab.find('table')
+            if table:
+                headers = []
+                rows_data = []
+                # Extract headers
+                header_row = table.find('thead').find('tr')
+                for th in header_row.find_all('th'):
+                    headers.append(th.text.strip())
+                # Extract data rows
+                body_rows = table.find('tbody').find_all('tr')
+                for row in body_rows:
+                    row_data = {}
+                    cells = row.find_all('td')
+                    if len(cells) == len(headers):
+                        for i, cell in enumerate(cells):
+                            row_data[headers[i]] = cell.text.strip()
+                        rows_data.append(row_data)
+                # Process quarterly data
+                if headers and rows_data:
+                    for i in range(1, len(headers)):  # Skip first header (metric names)
+                        period = headers[i]
+                        entry = FinancialEntry(period=period)
+                        for row in rows_data:
+                            metric = row[headers[0]]
+                            value = row[period]
+                            if 'Sales' in metric:
+                                entry.sales = self._clean_number(value)
+                            elif 'Profit after Taxation' in metric:
+                                entry.profit_after_tax = self._clean_number(value)
+                            elif 'EPS' in metric:
+                                entry.eps = self._clean_number(value)
+                        quarterly_data.append(entry)
+        return Financials(annual=annual_data, quarterly=quarterly_data)
+    def extract_ratios(self) -> List[RatioEntry]:
+        """Extract financial ratios"""
+        ratios = []
+        ratios_section = self.soup.find('div', id='ratios')
+        if not ratios_section:
+            return ratios
+        table = ratios_section.find('table')
+        if not table:
+            return ratios
+        headers = []
+        rows_data = []
+        # Extract headers
+        header_row = table.find('thead').find('tr')
+        for th in header_row.find_all('th'):
+            headers.append(th.text.strip())
+        # Extract data rows
+        body_rows = table.find('tbody').find_all('tr')
+        for row in body_rows:
+            row_data = {}
+            cells = row.find_all('td')
+            if len(cells) == len(headers):
+                for i, cell in enumerate(cells):
+                    row_data[headers[i]] = cell.text.strip()
+                rows_data.append(row_data)
+        # Process ratio data
+        if headers and rows_data:
+            for i in range(1, len(headers)):  # Skip first header (ratio names)
+                period = headers[i]
+                entry = RatioEntry(period=period)
+                for row in rows_data:
+                    ratio_name = row[headers[0]]
+                    value = row[period]
+                    # Clean value (remove parentheses for negative numbers)
+                    clean_value = value.replace('(', '').replace(')', '')
+                    if 'Gross Profit Margin' in ratio_name:
+                        entry.gross_profit_margin = self._clean_number(clean_value)
+                    elif 'Net Profit Margin' in ratio_name:
+                        entry.net_profit_margin = self._clean_number(clean_value)
+                    elif 'EPS Growth' in ratio_name:
+                        entry.eps_growth = self._clean_number(clean_value)
+                    elif 'PEG' in ratio_name:
+                        entry.peg = self._clean_number(clean_value)
+                ratios.append(entry)
+        return ratios
+    def scrape_all_data(self) -> CompanyData:
+        """Scrape all data and return as CompanyData object"""
+        return CompanyData(
+            announcements=self.extract_announcements(),
+            financials=self.extract_financials(),
+            ratios=self.extract_ratios()
+        )