Spaces:

sarim
/

pix

Running

App Files Files Community

sarim commited on Nov 21, 2025

Commit

ec315ba

1 Parent(s): c56da52

seleium

Browse files

Files changed (2) hide show

app.py +58 -40
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -1,5 +1,11 @@
 from fastapi import FastAPI
-from bs4 import BeautifulSoup, Tag
 import requests
 app = FastAPI(
@@ -8,49 +14,61 @@ app = FastAPI(
     description="Scrape data from PSX website"
 )
-def simple_dividend_extraction():
-    """
-    Simple function to extract dividend data
-    """
-    r = requests.get('https://sarmaaya.pk/announcements?cat=payouts')
-    print("request done")
-    soup = BeautifulSoup(r.text, 'html.parser')
-    table = soup.find('table', class_ = 'caption-bottom w-full min-w-full text-xs sm:text-sm')
-    print(table)
-    results = []
-    if table:
-        print("in table")
-        #rows = table.find('tbody').findChildren('tr')
-        rows = table.find_all('tr')[1:]
-        print(f"Number of rows found: {len(rows)}")
-        for row in rows:
-            if not hasattr(row, 'find_all'):
-                print("no data")
-                continue
-            print(f"\nRow type: {type(row)}")
-            print(f"Row contents: {row}")
-            print("in row")
-            cells = row.find_all('td')
-            if len(cells) >= 6:
-                company_name = cells[1].get_text(strip=True)
-                dividend_amount = cells[2].get_text(strip=True) or "No dividend"
-                dividend_date = cells[5].get_text(strip=True) or "No date"
-                print("adding data")
-                results.append({
-                    'Company': company_name,
-                    'Dividend': dividend_amount,
-                    'Date': dividend_date
-                })
-    print(results)
-    return results
 # Usage example for the simple version
 # dividend_info = simple_dividend_extraction('text.html')
@@ -64,7 +82,7 @@ def greet_json():
 @app.get("/dividend_history")
 def get_dividend():
-    return simple_dividend_extraction()

 from fastapi import FastAPI
+from selenium import webdriver
+from selenium.webdriver.chrome.options import Options
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC
+from bs4 import BeautifulSoup
+import time
 import requests
 app = FastAPI(
     description="Scrape data from PSX website"
 )
+def selenium_dividend_extraction():
+    # Set up Chrome options
+    chrome_options = Options()
+    chrome_options.add_argument("--headless")  # Run in background
+    chrome_options.add_argument("--no-sandbox")
+    chrome_options.add_argument("--disable-dev-shm-usage")
+    # Initialize the driver
+    driver = webdriver.Chrome(options=chrome_options)
+    try:
+        driver.get('https://scstrade.com/MarketStatistics/MS_xDates.aspx')
+        print("Page loaded")
+        # Wait for the table to load (adjust timeout as needed)
+        wait = WebDriverWait(driver, 10)
+        wait.until(EC.presence_of_element_located((By.ID, "list")))
+        # Get the page source after JavaScript execution
+        page_source = driver.page_source
+        soup = BeautifulSoup(page_source, 'html.parser')
+        table = soup.find('table', id='list')
+        print(table)
+        results = []
+        if table:
+            print("Table found with Selenium")
+            # Use CSS selector to get data rows
+            rows = table.select('tr.jqgrow')
+            print(f"Number of data rows found: {len(rows)}")
+            for row in rows:
+                cells = row.find_all('td')
+                if len(cells) >= 6:
+                    company_code = cells[0].get_text(strip=True)
+                    company_name = cells[1].get_text(strip=True)
+                    dividend_amount = cells[2].get_text(strip=True) or "No dividend"
+                    expiry_date = cells[5].get_text(strip=True) or "No date"
+                    results.append({
+                        'Company_Code': company_code,
+                        'Company_Name': company_name,
+                        'Dividend': dividend_amount,
+                        'Expiry_Date': expiry_date
+                    })
+        return results
+    except Exception as e:
+        print(f"Error: {e}")
+        return []
+    finally:
+        driver.quit()
 # Usage example for the simple version
 # dividend_info = simple_dividend_extraction('text.html')
 @app.get("/dividend_history")
 def get_dividend():
+    return selenium_dividend_extraction()

requirements.txt CHANGED Viewed

@@ -1,4 +1,5 @@
 fastapi
 uvicorn
 requests
-beautifulsoup4

 fastapi
 uvicorn
 requests
+beautifulsoup4
+selenium