Spaces:

sarim
/

pix

Running

App Files Files Community

sarim commited on Nov 21, 2025

Commit

1dd03e1

1 Parent(s): ec315ba

psx table

Browse files

Files changed (1) hide show

app.py +42 -56

app.py CHANGED Viewed

@@ -1,9 +1,4 @@
 from fastapi import FastAPI
-from selenium import webdriver
-from selenium.webdriver.chrome.options import Options
-from selenium.webdriver.common.by import By
-from selenium.webdriver.support.ui import WebDriverWait
-from selenium.webdriver.support import expected_conditions as EC
 from bs4 import BeautifulSoup
 import time
 import requests
@@ -14,61 +9,52 @@ app = FastAPI(
     description="Scrape data from PSX website"
 )
-def selenium_dividend_extraction():
-    # Set up Chrome options
-    chrome_options = Options()
-    chrome_options.add_argument("--headless")  # Run in background
-    chrome_options.add_argument("--no-sandbox")
-    chrome_options.add_argument("--disable-dev-shm-usage")
-    # Initialize the driver
-    driver = webdriver.Chrome(options=chrome_options)
-    try:
-        driver.get('https://scstrade.com/MarketStatistics/MS_xDates.aspx')
-        print("Page loaded")
-        # Wait for the table to load (adjust timeout as needed)
-        wait = WebDriverWait(driver, 10)
-        wait.until(EC.presence_of_element_located((By.ID, "list")))
-        # Get the page source after JavaScript execution
-        page_source = driver.page_source
-        soup = BeautifulSoup(page_source, 'html.parser')
-        table = soup.find('table', id='list')
-        print(table)
-        results = []
-        if table:
-            print("Table found with Selenium")
-            # Use CSS selector to get data rows
-            rows = table.select('tr.jqgrow')
-            print(f"Number of data rows found: {len(rows)}")
-            for row in rows:
-                cells = row.find_all('td')
-                if len(cells) >= 6:
-                    company_code = cells[0].get_text(strip=True)
-                    company_name = cells[1].get_text(strip=True)
-                    dividend_amount = cells[2].get_text(strip=True) or "No dividend"
-                    expiry_date = cells[5].get_text(strip=True) or "No date"
-                    results.append({
-                        'Company_Code': company_code,
-                        'Company_Name': company_name,
-                        'Dividend': dividend_amount,
-                        'Expiry_Date': expiry_date
-                    })
-        return results
-    except Exception as e:
-        print(f"Error: {e}")
-        return []
-    finally:
-        driver.quit()
 # Usage example for the simple version
 # dividend_info = simple_dividend_extraction('text.html')
@@ -82,7 +68,7 @@ def greet_json():
 @app.get("/dividend_history")
 def get_dividend():
-    return selenium_dividend_extraction()

 from fastapi import FastAPI
 from bs4 import BeautifulSoup
 import time
 import requests
     description="Scrape data from PSX website"
 )
+def simple_dividend_extraction():
+    """
+    Simple function to extract dividend data
+    """
+    r = requests.get('https://www.psx.com.pk/psx/announcement/financial-announcements')
+    print("request done")
+    soup = BeautifulSoup(r.text, 'html.parser')
+    table = soup.find('table')
+    _table = soup.select('list')
+    print(_table)
+    print(table)
+    results = []
+    if table:
+        print("in table")
+        #rows = table.find('tbody').findChildren('tr')
+        rows = table.find_all('tr')[1:]
+        print(f"Number of rows found: {len(rows)}")
+        for row in rows:
+            if not hasattr(row, 'find_all'):
+                print("no data")
+                continue
+            print(f"\nRow type: {type(row)}")
+            print(f"Row contents: {row}")
+            print("in row")
+            cells = row.find_all('td')
+            if len(cells) >= 6:
+                company_name = cells[1].get_text(strip=True)
+                dividend_amount = cells[2].get_text(strip=True) or "No dividend"
+                dividend_date = cells[5].get_text(strip=True) or "No date"
+                print("adding data")
+                results.append({
+                    'Company': company_name,
+                    'Dividend': dividend_amount,
+                    'Date': dividend_date
+                })
+    print(results)
+    return results
 # Usage example for the simple version
 # dividend_info = simple_dividend_extraction('text.html')
 @app.get("/dividend_history")
 def get_dividend():
+    return simple_dividend_extraction()