Spaces:

babaTEEpe
/

porty

Sleeping

File size: 22,280 Bytes

"""
CLOUD MONITOR — Lightweight monitor for Hugging Face Spaces
Uses Playwright (headless Chromium) instead of Selenium+Firefox.
Replicates the exact navigation flow of the local monitor.py.
"""
import os
import csv
import time
import asyncio
from datetime import datetime

try:
    from playwright.async_api import async_playwright
except ImportError:
    print("Installing playwright...")
    os.system("pip install playwright && playwright install chromium")
    from playwright.async_api import async_playwright

from flask import Flask, send_file
import threading
import asyncio
from huggingface_hub import HfApi, hf_hub_download

# --- Cloud Storage Settings ---
HF_TOKEN = os.getenv("HF_TOKEN")
# Format: "your-username/your-dataset-name" (e.g., "Teatop/sporty-data")
DATASET_ID = os.getenv("HF_DATASET") 

def sync_to_cloud():
    """Uploads the local CSV to Hugging Face Dataset."""
    if not HF_TOKEN or not DATASET_ID:
        return
    try:
        api = HfApi(token=HF_TOKEN)
        api.upload_file(
            path_or_fileobj=CSV_FILE,
            path_in_repo=CSV_FILE,
            repo_id=DATASET_ID,
            repo_type="dataset",
        )
        # Also sync the txt file for legacy support
        if os.path.exists(RESULTS_FILE):
            api.upload_file(
                path_or_fileobj=RESULTS_FILE,
                path_in_repo=RESULTS_FILE,
                repo_id=DATASET_ID,
                repo_type="dataset",
            )
        print("  [CLOUD] Sync complete! Data is safe in your Dataset.")
    except Exception as e:
        print(f"  [CLOUD] Sync error: {str(e)}")

def pull_from_cloud():
    """Downloads the latest data from Hugging Face on startup."""
    if not HF_TOKEN or not DATASET_ID:
        return
    try:
        print(f"[Monitor] Pulling history from {DATASET_ID}...")
        hf_hub_download(
            repo_id=DATASET_ID,
            filename=CSV_FILE,
            repo_type="dataset",
            token=HF_TOKEN,
            local_dir=".",
            local_dir_use_symlinks=False
        )
        if os.path.exists(RESULTS_FILE):
            hf_hub_download(
                repo_id=DATASET_ID,
                filename=RESULTS_FILE,
                repo_type="dataset",
                token=HF_TOKEN,
                local_dir=".",
                local_dir_use_symlinks=False
            )
        print("[Monitor] History successfully restored from cloud!")
    except Exception:
        print("[Monitor] No cloud history found, starting fresh.")

app = Flask(__name__)

@app.route('/')
def index():
    return '''
    <html>
        <head><title>Professor's Cloud Monitor</title></head>
        <body style="font-family: sans-serif; text-align: center; padding: 50px; background: #121212; color: white;">
            <h1>🧠 Virtual Football Cloud Monitor 👽</h1>
            <p>Status: Running 24/7</p>
            <br>
            <a href="/download" style="background: #00ff00; color: black; padding: 15px 30px; text-decoration: none; border-radius: 5px; font-weight: bold;">
                DOWNLOAD MATCH_HISTORY.CSV
            </a>
            <p style="margin-top: 50px; color: #888;">Note: Download your data daily. Free Spaces reset periodically!</p>
            <br>
            <a href="/debug" style="background: #ff6600; color: white; padding: 10px 20px; text-decoration: none; border-radius: 5px;">
                VIEW DEBUG SCREENSHOT
            </a>
        </body>
    </html>
    '''

@app.route('/download')
def download():
    if os.path.exists("match_history.csv"):
        return send_file("match_history.csv", as_attachment=True)
    return "No data collected yet!"

@app.route('/debug')
def debug():
    screenshots = sorted([f for f in os.listdir('.') if f.startswith('debug_') and f.endswith('.png')])
    if not screenshots:
        return "No debug screenshots yet. Wait for cycle 1."
    return send_file(screenshots[-1], mimetype='image/png')


RESULTS_FILE = "england_virtual_results.txt"
CSV_FILE = "match_history.csv"
CHECK_INTERVAL = 15  # Check every 15s to catch the 84-90min window
URL = "https://www.sportybet.com/ng/sport/vFootball/sv:category:202120002/sv:league:2"


def save_result(home_team, away_team, home_score, away_score, h_odds='', d_odds='', a_odds=''):
    """Save match result to both txt and csv files."""
    with open(RESULTS_FILE, 'a', encoding='utf-8') as f:
        f.write(f"{home_team} {home_score}:{away_score} {away_team}\n")

    file_exists = os.path.exists(CSV_FILE)
    with open(CSV_FILE, 'a', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        if not file_exists:
            writer.writerow(['timestamp', 'home_team', 'away_team', 'home_score', 'away_score',
                             'h_odds', 'd_odds', 'a_odds'])
        writer.writerow([
            datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
            home_team, away_team, home_score, away_score,
            h_odds, d_odds, a_odds
        ])
    print(f"  [SAVED] {home_team} {home_score}:{away_score} {away_team} | Odds: H={h_odds} D={d_odds} A={a_odds}")


async def run_monitor():
    """Main monitoring loop — mirrors local monitor.py navigation exactly."""
    
    # 1. Pull latest data from Hugging Face Dataset if configured
    pull_from_cloud()
    
    saved_matches = set()

    # Load already saved matches from CSV (it has timestamps)
    if os.path.exists(CSV_FILE):
        with open(CSV_FILE, 'r', encoding='utf-8') as f:
            reader = csv.DictReader(f)
            for row in reader:
                try:
                    # Create ID from teams + hour of match
                    dt = datetime.strptime(row['timestamp'], '%Y-%m-%d %H:%M:%S')
                    hour_id = dt.strftime('%Y%m%d%H')
                    saved_matches.add(f"{row['home_team']}_{row['away_team']}_{hour_id}")
                except:
                    continue
        print(f"[Monitor] {len(saved_matches)} unique hourly matches in history")

    async with async_playwright() as p:
        print("[Monitor] Launching headless browser...")
        browser = await p.chromium.launch(headless=True)
        context = await browser.new_context(
            viewport={'width': 1920, 'height': 1080},
            user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
        )
        page = await context.new_page()

        print("[Monitor] Starting 24/7 monitoring loop...")
        print(f"[Monitor] Checking every {CHECK_INTERVAL} seconds")
        print(f"[Monitor] Saving to: {RESULTS_FILE} + {CSV_FILE}\n")

        cycle = 0
        while True:
            cycle += 1
            try:
                print(f"\n--- Cycle {cycle} | {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} ---")

                # ============================================================
                # STEP 1: Navigate to the vFootball page (same URL as local)
                # ============================================================
                try:
                    await page.goto(URL, wait_until='domcontentloaded', timeout=45000)
                    await asyncio.sleep(5)
                except Exception:
                    print("  Page load timed out, trying to continue...")

                # ============================================================
                # STEP 2: Click the "Live Betting" TOGGLE inside the vFootball
                # page content — NOT the "Live Betting" in the top nav bar!
                #
                # The page has TWO elements with "Live Betting" text:
                #   - Top nav bar link → navigates AWAY (WRONG!)
                #   - In-page toggle → shows live matches (CORRECT!)
                #
                # The local monitor targets it with these XPATHs:
                #   //div[@role='text leaf' and contains(text(),'Live Betting')]
                #   //div[contains(@class,'league-title')]//span[@data-cms-key='live_betting']
                #   //div[contains(@class,'league-title')]//span[contains(text(),'Live Betting')]
                # ============================================================
                live_clicked = False

                # Method 1: role='text leaf' (exact from local monitor line 58)
                if not live_clicked:
                    try:
                        el = await page.query_selector("div[role='text leaf']")
                        if el:
                            text = await el.inner_text()
                            if 'Live Betting' in text:
                                await el.evaluate("el => el.click()")
                                live_clicked = True
                                print("  Clicked Live Betting (method 1: role text leaf)")
                    except Exception:
                        pass

                # Method 2: inside league-title with data-cms-key (local monitor line 69)
                if not live_clicked:
                    try:
                        el = await page.query_selector("div.league-title span[data-cms-key='live_betting']")
                        if el:
                            await el.evaluate("el => el.click()")
                            live_clicked = True
                            print("  Clicked Live Betting (method 2: league-title cms-key)")
                    except Exception:
                        pass

                # Method 3: inside league-title with text (local monitor line 64)
                if not live_clicked:
                    try:
                        el = await page.query_selector("div.league-title span:has-text('Live Betting')")
                        if el:
                            await el.evaluate("el => el.click()")
                            live_clicked = True
                            print("  Clicked Live Betting (method 3: league-title span text)")
                    except Exception:
                        pass

                # Method 4: Look for the checkbox/link that says "Live Betting" with a number
                # In the screenshot it shows "Live Betting 39" — target the one with a number
                if not live_clicked:
                    try:
                        # Find all elements with "Live Betting", skip the top nav one
                        elements = await page.query_selector_all("*:has-text('Live Betting')")
                        for el in elements:
                            text = (await el.inner_text()).strip()
                            tag = await el.evaluate("el => el.tagName")
                            # The in-page one has a number like "Live Betting 39"
                            # The top nav just says "Live Betting"
                            if 'Live Betting' in text and any(c.isdigit() for c in text) and tag != 'NAV':
                                parent_class = await el.evaluate("el => el.className || ''")
                                if 'nav' not in parent_class.lower() and 'header' not in parent_class.lower():
                                    await el.evaluate("el => el.click()")
                                    live_clicked = True
                                    print(f"  Clicked Live Betting (method 4: element with number '{text[:30]}')")
                                    break
                    except Exception:
                        pass

                # Method 5: Use JavaScript to find and click (nuclear option)
                if not live_clicked:
                    try:
                        clicked = await page.evaluate("""() => {
                            // Find all elements containing "Live Betting"
                            const walker = document.createTreeWalker(
                                document.body, NodeFilter.SHOW_ELEMENT
                            );
                            while (walker.nextNode()) {
                                const el = walker.currentNode;
                                const text = el.textContent || '';
                                const cls = el.className || '';
                                // Target elements inside league-title or with role text
                                if (text.includes('Live Betting') && 
                                    (cls.includes('league-title') || el.getAttribute('role') === 'text leaf' || 
                                     cls.includes('league-tab'))) {
                                    el.click();
                                    return 'clicked: ' + cls;
                                }
                            }
                            return null;
                        }""")
                        if clicked:
                            live_clicked = True
                            print(f"  Clicked Live Betting (method 5: JS walker - {clicked})")
                    except Exception:
                        pass

                if not live_clicked:
                    print("  WARNING: Could not find in-page Live Betting toggle!")
                    # Debug: screenshot to see current state
                    if cycle <= 10:
                        await page.screenshot(path=f"debug_livebtn_fail_{cycle}.png")
                    await asyncio.sleep(CHECK_INTERVAL)
                    continue

                await asyncio.sleep(5)

                # No need to click vFootball or England — the URL already targets
                # the correct league. The Live Betting toggle just switches from
                # "scheduled" to "live" matches within that league.

                # ============================================================
                # STEP 4: Wait for England Virtual matches — RETRY LOOP
                # Local monitor retries up to 30 times (5 minutes).
                # We do the same.
                # ============================================================
                max_attempts = 10
                england_found = False

                for attempt in range(max_attempts):
                    try:
                        # Look for England Virtual header (exact XPATH from local)
                        header = await page.query_selector(
                            "div.m-table-cell.league:has-text('England Virtual')"
                        )
                        if header:
                            print(f"  Found 'England Virtual' header! (attempt {attempt + 1})")
                            england_found = True
                            break
                    except Exception:
                        pass

                    # Debug screenshot on first attempt
                    if attempt == 0 and cycle <= 5:
                        await page.screenshot(path=f"debug_cycle{cycle}_attempt{attempt}.png")
                        body = await page.inner_text("body")
                        preview = body[:400].replace('\n', ' | ')
                        print(f"  [DEBUG] Content: {preview}")

                    print(f"  England Virtual not found yet (attempt {attempt + 1}/{max_attempts}), waiting 10s...")
                    await asyncio.sleep(10)

                if not england_found:
                    print("  No England Virtual matches found this cycle")
                    await asyncio.sleep(CHECK_INTERVAL)
                    continue

                # ============================================================
                # STEP 5: Extract ONLY England Virtual match rows
                # The page shows matches from ALL leagues (England, Spain, 
                # Italy, France). We need only the rows between the 
                # "England Virtual" header and the next league header.
                # This replicates the local monitor's XPATH logic.
                # ============================================================
                match_rows = await page.evaluate("""() => {
                    // Find all league headers and match rows
                    const headers = document.querySelectorAll('div.m-table-cell.league');
                    const allRows = document.querySelectorAll('div.m-table-row.m-content-row.match-row.vFootball-row');
                    
                    // Find the England Virtual header position
                    let englandHeader = null;
                    let nextHeader = null;
                    let foundEngland = false;
                    
                    for (const h of headers) {
                        if (h.textContent.includes('England Virtual')) {
                            englandHeader = h;
                            foundEngland = true;
                        } else if (foundEngland && !nextHeader) {
                            nextHeader = h;
                        }
                    }
                    
                    if (!englandHeader) return [];
                    
                    // Get the vertical position of England header and next header
                    const englandTop = englandHeader.getBoundingClientRect().top;
                    const nextTop = nextHeader ? nextHeader.getBoundingClientRect().top : 999999;
                    
                    // Filter rows that are between England header and next header
                    const indices = [];
                    const rows = Array.from(allRows);
                    for (let i = 0; i < rows.length; i++) {
                        const rowTop = rows[i].getBoundingClientRect().top;
                        if (rowTop > englandTop && rowTop < nextTop) {
                            indices.push(i);
                        }
                    }
                    return indices;
                }""")

                # Now get only the England Virtual rows by index
                all_rows = await page.query_selector_all(
                    "div.m-table-row.m-content-row.match-row.vFootball-row"
                )
                england_rows = [all_rows[i] for i in match_rows if i < len(all_rows)]
                print(f"  Found {len(england_rows)} England Virtual match rows (filtered from {len(all_rows)} total)")

                if len(england_rows) < 10:
                    print(f"  Less than 10 England matches, waiting...")
                    await asyncio.sleep(10)
                    continue

                matches_saved_this_cycle = 0
                for row in england_rows:
                    try:
                        # Get match time
                        time_el = await row.query_selector("div.clock-time")
                        if not time_el:
                            continue
                        match_time = (await time_el.inner_text()).strip()
                        if not match_time or ':' not in match_time:
                            continue

                        current_minute = int(match_time.split(':')[0])

                        # Get teams
                        home_el = await row.query_selector("div.home-team")
                        away_el = await row.query_selector("div.away-team")
                        if not home_el or not away_el:
                            continue
                        home_team = (await home_el.inner_text()).strip()
                        away_team = (await away_el.inner_text()).strip()

                        # Unique ID including the current hour so we can save 
                        # the same fixture in different rounds
                        hour_id = datetime.now().strftime('%Y%m%d%H')
                        match_id = f"{home_team}_{away_team}_{hour_id}"

                        # Get scores
                        score_items = await row.query_selector_all("div.score-item")
                        if len(score_items) != 2:
                            continue
                        home_score = (await score_items[0].inner_text()).strip()
                        away_score = (await score_items[1].inner_text()).strip()

                        # Get 1X2 odds
                        h_odds = d_odds = a_odds = ''
                        try:
                            odds_els = await row.query_selector_all("span.m-outcome-odds")
                            if len(odds_els) >= 3:
                                h_odds = (await odds_els[0].inner_text()).strip()
                                d_odds = (await odds_els[1].inner_text()).strip()
                                a_odds = (await odds_els[2].inner_text()).strip()
                        except Exception:
                            pass

                        # Save at minute 84+ if not already saved
                        if current_minute >= 84 and match_id not in saved_matches:
                            save_result(home_team, away_team, home_score, away_score,
                                        h_odds, d_odds, a_odds)
                            saved_matches.add(match_id)
                            matches_saved_this_cycle += 1
                        elif current_minute < 84:
                            print(f"  {home_team} {home_score}:{away_score} {away_team} @ {match_time} (waiting...)")

                    except Exception:
                        continue

                if matches_saved_this_cycle > 0:
                    print(f"  Saved {matches_saved_this_cycle} new results this cycle")
                    # 2. Sync to cloud after every successful harvest
                    sync_to_cloud()

            except Exception as e:
                print(f"  Error in cycle {cycle}: {str(e)}")

            # Wait before next check
            if cycle % 20 == 0: # Periodic sync every ~5 mins as backup
                sync_to_cloud()
                
            print(f"  Next check in {CHECK_INTERVAL}s...")
            await asyncio.sleep(CHECK_INTERVAL)


if __name__ == '__main__':
    print("=" * 50)
    print("VIRTUAL FOOTBALL CLOUD MONITOR")
    print("Lightweight Playwright-based harvester")
    print("=" * 50)

    # Start web server in background (Port 7860 for HuggingFace)
    def run_app():
        app.run(host='0.0.0.0', port=7860)

    threading.Thread(target=run_app, daemon=True).start()

    # Run monitor
    asyncio.run(run_monitor())