""" CLOUD MONITOR — Lightweight monitor for Hugging Face Spaces Uses Playwright (headless Chromium) instead of Selenium+Firefox. Replicates the exact navigation flow of the local monitor.py. """ import os import csv import time import asyncio from datetime import datetime try: from playwright.async_api import async_playwright except ImportError: print("Installing playwright...") os.system("pip install playwright && playwright install chromium") from playwright.async_api import async_playwright from flask import Flask, send_file import threading import asyncio from huggingface_hub import HfApi, hf_hub_download # --- Cloud Storage Settings --- HF_TOKEN = os.getenv("HF_TOKEN") # Format: "your-username/your-dataset-name" (e.g., "Teatop/sporty-data") DATASET_ID = os.getenv("HF_DATASET") def sync_to_cloud(): """Uploads the local CSV to Hugging Face Dataset.""" if not HF_TOKEN or not DATASET_ID: return try: api = HfApi(token=HF_TOKEN) api.upload_file( path_or_fileobj=CSV_FILE, path_in_repo=CSV_FILE, repo_id=DATASET_ID, repo_type="dataset", ) # Also sync the txt file for legacy support if os.path.exists(RESULTS_FILE): api.upload_file( path_or_fileobj=RESULTS_FILE, path_in_repo=RESULTS_FILE, repo_id=DATASET_ID, repo_type="dataset", ) print(" [CLOUD] Sync complete! Data is safe in your Dataset.") except Exception as e: print(f" [CLOUD] Sync error: {str(e)}") def pull_from_cloud(): """Downloads the latest data from Hugging Face on startup.""" if not HF_TOKEN or not DATASET_ID: return try: print(f"[Monitor] Pulling history from {DATASET_ID}...") hf_hub_download( repo_id=DATASET_ID, filename=CSV_FILE, repo_type="dataset", token=HF_TOKEN, local_dir=".", local_dir_use_symlinks=False ) if os.path.exists(RESULTS_FILE): hf_hub_download( repo_id=DATASET_ID, filename=RESULTS_FILE, repo_type="dataset", token=HF_TOKEN, local_dir=".", local_dir_use_symlinks=False ) print("[Monitor] History successfully restored from cloud!") except Exception: print("[Monitor] No cloud history found, starting fresh.") app = Flask(__name__) @app.route('/') def index(): return ''' Professor's Cloud Monitor

🧠 Virtual Football Cloud Monitor 👽

Status: Running 24/7


DOWNLOAD MATCH_HISTORY.CSV

Note: Download your data daily. Free Spaces reset periodically!


VIEW DEBUG SCREENSHOT ''' @app.route('/download') def download(): if os.path.exists("match_history.csv"): return send_file("match_history.csv", as_attachment=True) return "No data collected yet!" @app.route('/debug') def debug(): screenshots = sorted([f for f in os.listdir('.') if f.startswith('debug_') and f.endswith('.png')]) if not screenshots: return "No debug screenshots yet. Wait for cycle 1." return send_file(screenshots[-1], mimetype='image/png') RESULTS_FILE = "england_virtual_results.txt" CSV_FILE = "match_history.csv" CHECK_INTERVAL = 15 # Check every 15s to catch the 84-90min window URL = "https://www.sportybet.com/ng/sport/vFootball/sv:category:202120002/sv:league:2" def save_result(home_team, away_team, home_score, away_score, h_odds='', d_odds='', a_odds=''): """Save match result to both txt and csv files.""" with open(RESULTS_FILE, 'a', encoding='utf-8') as f: f.write(f"{home_team} {home_score}:{away_score} {away_team}\n") file_exists = os.path.exists(CSV_FILE) with open(CSV_FILE, 'a', newline='', encoding='utf-8') as f: writer = csv.writer(f) if not file_exists: writer.writerow(['timestamp', 'home_team', 'away_team', 'home_score', 'away_score', 'h_odds', 'd_odds', 'a_odds']) writer.writerow([ datetime.now().strftime('%Y-%m-%d %H:%M:%S'), home_team, away_team, home_score, away_score, h_odds, d_odds, a_odds ]) print(f" [SAVED] {home_team} {home_score}:{away_score} {away_team} | Odds: H={h_odds} D={d_odds} A={a_odds}") async def run_monitor(): """Main monitoring loop — mirrors local monitor.py navigation exactly.""" # 1. Pull latest data from Hugging Face Dataset if configured pull_from_cloud() saved_matches = set() # Load already saved matches from CSV (it has timestamps) if os.path.exists(CSV_FILE): with open(CSV_FILE, 'r', encoding='utf-8') as f: reader = csv.DictReader(f) for row in reader: try: # Create ID from teams + hour of match dt = datetime.strptime(row['timestamp'], '%Y-%m-%d %H:%M:%S') hour_id = dt.strftime('%Y%m%d%H') saved_matches.add(f"{row['home_team']}_{row['away_team']}_{hour_id}") except: continue print(f"[Monitor] {len(saved_matches)} unique hourly matches in history") async with async_playwright() as p: print("[Monitor] Launching headless browser...") browser = await p.chromium.launch(headless=True) context = await browser.new_context( viewport={'width': 1920, 'height': 1080}, user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36' ) page = await context.new_page() print("[Monitor] Starting 24/7 monitoring loop...") print(f"[Monitor] Checking every {CHECK_INTERVAL} seconds") print(f"[Monitor] Saving to: {RESULTS_FILE} + {CSV_FILE}\n") cycle = 0 while True: cycle += 1 try: print(f"\n--- Cycle {cycle} | {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} ---") # ============================================================ # STEP 1: Navigate to the vFootball page (same URL as local) # ============================================================ try: await page.goto(URL, wait_until='domcontentloaded', timeout=45000) await asyncio.sleep(5) except Exception: print(" Page load timed out, trying to continue...") # ============================================================ # STEP 2: Click the "Live Betting" TOGGLE inside the vFootball # page content — NOT the "Live Betting" in the top nav bar! # # The page has TWO elements with "Live Betting" text: # - Top nav bar link → navigates AWAY (WRONG!) # - In-page toggle → shows live matches (CORRECT!) # # The local monitor targets it with these XPATHs: # //div[@role='text leaf' and contains(text(),'Live Betting')] # //div[contains(@class,'league-title')]//span[@data-cms-key='live_betting'] # //div[contains(@class,'league-title')]//span[contains(text(),'Live Betting')] # ============================================================ live_clicked = False # Method 1: role='text leaf' (exact from local monitor line 58) if not live_clicked: try: el = await page.query_selector("div[role='text leaf']") if el: text = await el.inner_text() if 'Live Betting' in text: await el.evaluate("el => el.click()") live_clicked = True print(" Clicked Live Betting (method 1: role text leaf)") except Exception: pass # Method 2: inside league-title with data-cms-key (local monitor line 69) if not live_clicked: try: el = await page.query_selector("div.league-title span[data-cms-key='live_betting']") if el: await el.evaluate("el => el.click()") live_clicked = True print(" Clicked Live Betting (method 2: league-title cms-key)") except Exception: pass # Method 3: inside league-title with text (local monitor line 64) if not live_clicked: try: el = await page.query_selector("div.league-title span:has-text('Live Betting')") if el: await el.evaluate("el => el.click()") live_clicked = True print(" Clicked Live Betting (method 3: league-title span text)") except Exception: pass # Method 4: Look for the checkbox/link that says "Live Betting" with a number # In the screenshot it shows "Live Betting 39" — target the one with a number if not live_clicked: try: # Find all elements with "Live Betting", skip the top nav one elements = await page.query_selector_all("*:has-text('Live Betting')") for el in elements: text = (await el.inner_text()).strip() tag = await el.evaluate("el => el.tagName") # The in-page one has a number like "Live Betting 39" # The top nav just says "Live Betting" if 'Live Betting' in text and any(c.isdigit() for c in text) and tag != 'NAV': parent_class = await el.evaluate("el => el.className || ''") if 'nav' not in parent_class.lower() and 'header' not in parent_class.lower(): await el.evaluate("el => el.click()") live_clicked = True print(f" Clicked Live Betting (method 4: element with number '{text[:30]}')") break except Exception: pass # Method 5: Use JavaScript to find and click (nuclear option) if not live_clicked: try: clicked = await page.evaluate("""() => { // Find all elements containing "Live Betting" const walker = document.createTreeWalker( document.body, NodeFilter.SHOW_ELEMENT ); while (walker.nextNode()) { const el = walker.currentNode; const text = el.textContent || ''; const cls = el.className || ''; // Target elements inside league-title or with role text if (text.includes('Live Betting') && (cls.includes('league-title') || el.getAttribute('role') === 'text leaf' || cls.includes('league-tab'))) { el.click(); return 'clicked: ' + cls; } } return null; }""") if clicked: live_clicked = True print(f" Clicked Live Betting (method 5: JS walker - {clicked})") except Exception: pass if not live_clicked: print(" WARNING: Could not find in-page Live Betting toggle!") # Debug: screenshot to see current state if cycle <= 10: await page.screenshot(path=f"debug_livebtn_fail_{cycle}.png") await asyncio.sleep(CHECK_INTERVAL) continue await asyncio.sleep(5) # No need to click vFootball or England — the URL already targets # the correct league. The Live Betting toggle just switches from # "scheduled" to "live" matches within that league. # ============================================================ # STEP 4: Wait for England Virtual matches — RETRY LOOP # Local monitor retries up to 30 times (5 minutes). # We do the same. # ============================================================ max_attempts = 10 england_found = False for attempt in range(max_attempts): try: # Look for England Virtual header (exact XPATH from local) header = await page.query_selector( "div.m-table-cell.league:has-text('England Virtual')" ) if header: print(f" Found 'England Virtual' header! (attempt {attempt + 1})") england_found = True break except Exception: pass # Debug screenshot on first attempt if attempt == 0 and cycle <= 5: await page.screenshot(path=f"debug_cycle{cycle}_attempt{attempt}.png") body = await page.inner_text("body") preview = body[:400].replace('\n', ' | ') print(f" [DEBUG] Content: {preview}") print(f" England Virtual not found yet (attempt {attempt + 1}/{max_attempts}), waiting 10s...") await asyncio.sleep(10) if not england_found: print(" No England Virtual matches found this cycle") await asyncio.sleep(CHECK_INTERVAL) continue # ============================================================ # STEP 5: Extract ONLY England Virtual match rows # The page shows matches from ALL leagues (England, Spain, # Italy, France). We need only the rows between the # "England Virtual" header and the next league header. # This replicates the local monitor's XPATH logic. # ============================================================ match_rows = await page.evaluate("""() => { // Find all league headers and match rows const headers = document.querySelectorAll('div.m-table-cell.league'); const allRows = document.querySelectorAll('div.m-table-row.m-content-row.match-row.vFootball-row'); // Find the England Virtual header position let englandHeader = null; let nextHeader = null; let foundEngland = false; for (const h of headers) { if (h.textContent.includes('England Virtual')) { englandHeader = h; foundEngland = true; } else if (foundEngland && !nextHeader) { nextHeader = h; } } if (!englandHeader) return []; // Get the vertical position of England header and next header const englandTop = englandHeader.getBoundingClientRect().top; const nextTop = nextHeader ? nextHeader.getBoundingClientRect().top : 999999; // Filter rows that are between England header and next header const indices = []; const rows = Array.from(allRows); for (let i = 0; i < rows.length; i++) { const rowTop = rows[i].getBoundingClientRect().top; if (rowTop > englandTop && rowTop < nextTop) { indices.push(i); } } return indices; }""") # Now get only the England Virtual rows by index all_rows = await page.query_selector_all( "div.m-table-row.m-content-row.match-row.vFootball-row" ) england_rows = [all_rows[i] for i in match_rows if i < len(all_rows)] print(f" Found {len(england_rows)} England Virtual match rows (filtered from {len(all_rows)} total)") if len(england_rows) < 10: print(f" Less than 10 England matches, waiting...") await asyncio.sleep(10) continue matches_saved_this_cycle = 0 for row in england_rows: try: # Get match time time_el = await row.query_selector("div.clock-time") if not time_el: continue match_time = (await time_el.inner_text()).strip() if not match_time or ':' not in match_time: continue current_minute = int(match_time.split(':')[0]) # Get teams home_el = await row.query_selector("div.home-team") away_el = await row.query_selector("div.away-team") if not home_el or not away_el: continue home_team = (await home_el.inner_text()).strip() away_team = (await away_el.inner_text()).strip() # Unique ID including the current hour so we can save # the same fixture in different rounds hour_id = datetime.now().strftime('%Y%m%d%H') match_id = f"{home_team}_{away_team}_{hour_id}" # Get scores score_items = await row.query_selector_all("div.score-item") if len(score_items) != 2: continue home_score = (await score_items[0].inner_text()).strip() away_score = (await score_items[1].inner_text()).strip() # Get 1X2 odds h_odds = d_odds = a_odds = '' try: odds_els = await row.query_selector_all("span.m-outcome-odds") if len(odds_els) >= 3: h_odds = (await odds_els[0].inner_text()).strip() d_odds = (await odds_els[1].inner_text()).strip() a_odds = (await odds_els[2].inner_text()).strip() except Exception: pass # Save at minute 84+ if not already saved if current_minute >= 84 and match_id not in saved_matches: save_result(home_team, away_team, home_score, away_score, h_odds, d_odds, a_odds) saved_matches.add(match_id) matches_saved_this_cycle += 1 elif current_minute < 84: print(f" {home_team} {home_score}:{away_score} {away_team} @ {match_time} (waiting...)") except Exception: continue if matches_saved_this_cycle > 0: print(f" Saved {matches_saved_this_cycle} new results this cycle") # 2. Sync to cloud after every successful harvest sync_to_cloud() except Exception as e: print(f" Error in cycle {cycle}: {str(e)}") # Wait before next check if cycle % 20 == 0: # Periodic sync every ~5 mins as backup sync_to_cloud() print(f" Next check in {CHECK_INTERVAL}s...") await asyncio.sleep(CHECK_INTERVAL) if __name__ == '__main__': print("=" * 50) print("VIRTUAL FOOTBALL CLOUD MONITOR") print("Lightweight Playwright-based harvester") print("=" * 50) # Start web server in background (Port 7860 for HuggingFace) def run_app(): app.run(host='0.0.0.0', port=7860) threading.Thread(target=run_app, daemon=True).start() # Run monitor asyncio.run(run_monitor())