Spaces:

CommunityOne
/

open-navigator

Running on CPU Upgrade

File size: 4,720 Bytes

61d29fc

#!/usr/bin/env python3
"""
Debug script to examine eBoard page structure
"""
import asyncio
from playwright.async_api import async_playwright
from playwright_stealth import Stealth
from bs4 import BeautifulSoup
from urllib.parse import urljoin
import re


async def main():
    url = "https://simbli.eboardsolutions.com/SB_Meetings/SB_MeetingListing.aspx?S=2088"
    base_url = "https://simbli.eboardsolutions.com"
    
    print(f"Loading: {url}\n")
    
    async with async_playwright() as p:
        browser = await p.chromium.launch(
            headless=True,
            args=[
                '--disable-blink-features=AutomationControlled',
                '--disable-dev-shm-usage',
                '--no-sandbox'
            ]
        )
        
        user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36'
        
        context = await browser.new_context(
            viewport={'width': 1920, 'height': 1080},
            user_agent=user_agent,
            locale='en-US',
            timezone_id='America/Chicago',
        )
        
        page = await context.new_page()
        
        # Apply stealth
        stealth = Stealth()
        await stealth.apply_stealth_async(page)
        
        # Navigate
        response = await page.goto(url, wait_until='networkidle', timeout=60000)
        print(f"Response status: {response.status}")
        
        # Wait for JavaScript
        await page.wait_for_timeout(5000)
        
        content = await page.content()
        print(f"Page size: {len(content)} bytes\n")
        
        # Save full HTML for inspection
        with open('/tmp/eboard_page.html', 'w') as f:
            f.write(content)
        print("Saved full HTML to /tmp/eboard_page.html\n")
        
        # Parse with BeautifulSoup
        soup = BeautifulSoup(content, 'html.parser')
        
        # Find all links
        all_links = soup.find_all('a', href=True)
        print(f"Total links found: {len(all_links)}\n")
        
        # Categorize links
        mid_links = []
        meetingdetail_links = []
        pdf_links = []
        other_links = []
        
        for link in all_links:
            href = link.get('href', '')
            text = link.get_text().strip()
            
            if 'MID=' in href.upper():
                mid_links.append((href, text))
            elif 'meetingdetail' in href.lower():
                meetingdetail_links.append((href, text))
            elif href.lower().endswith('.pdf'):
                pdf_links.append((href, text))
            elif href and not href.startswith('#') and not href.startswith('javascript:'):
                other_links.append((href, text[:50]))
        
        print(f"Links with MID=: {len(mid_links)}")
        for href, text in mid_links[:10]:
            print(f"  - {text[:60]}: {href[:80]}")
        
        print(f"\nLinks with 'meetingdetail': {len(meetingdetail_links)}")
        for href, text in meetingdetail_links[:10]:
            print(f"  - {text[:60]}: {href[:80]}")
        
        print(f"\nPDF links: {len(pdf_links)}")
        for href, text in pdf_links[:10]:
            print(f"  - {text[:60]}: {href[:80]}")
        
        print(f"\nOther significant links: {len(other_links)}")
        for href, text in other_links[:20]:
            print(f"  - {text[:60]}: {href[:80]}")
        
        # Look for ASP.NET ViewState and other dynamic content indicators
        print("\n" + "="*80)
        print("Page Analysis:")
        print("="*80)
        
        viewstate = soup.find('input', {'id': '__VIEWSTATE'})
        if viewstate:
            print(f"✓ ASP.NET ViewState present ({len(viewstate.get('value', ''))} chars)")
        
        # Look for tables or grids that might contain meetings
        tables = soup.find_all('table')
        print(f"Tables found: {len(tables)}")
        for i, table in enumerate(tables[:5]):
            rows = table.find_all('tr')
            print(f"  Table {i+1}: {len(rows)} rows")
            if rows:
                first_row_text = rows[0].get_text().strip()[:100]
                print(f"    First row: {first_row_text}")
        
        # Look for JavaScript-rendered content
        scripts = soup.find_all('script')
        print(f"\nJavaScript blocks: {len(scripts)}")
        
        # Check for common eBoard element IDs
        meeting_list_elem = soup.find(id=re.compile(r'meeting.*list', re.I))
        if meeting_list_elem:
            print(f"✓ Found element with 'meeting' and 'list' in ID: {meeting_list_elem.get('id')}")
        
        await browser.close()


if __name__ == "__main__":
    asyncio.run(main())