"""
CLOUD MONITOR — Lightweight monitor for Hugging Face Spaces
Uses Playwright (headless Chromium) instead of Selenium+Firefox.
Replicates the exact navigation flow of the local monitor.py.
"""
import os
import csv
import time
import asyncio
from datetime import datetime
try:
from playwright.async_api import async_playwright
except ImportError:
print("Installing playwright...")
os.system("pip install playwright && playwright install chromium")
from playwright.async_api import async_playwright
from flask import Flask, send_file
import threading
import asyncio
from huggingface_hub import HfApi, hf_hub_download
# --- Cloud Storage Settings ---
HF_TOKEN = os.getenv("HF_TOKEN")
# Format: "your-username/your-dataset-name" (e.g., "Teatop/sporty-data")
DATASET_ID = os.getenv("HF_DATASET")
def sync_to_cloud():
"""Uploads the local CSV to Hugging Face Dataset."""
if not HF_TOKEN or not DATASET_ID:
return
try:
api = HfApi(token=HF_TOKEN)
api.upload_file(
path_or_fileobj=CSV_FILE,
path_in_repo=CSV_FILE,
repo_id=DATASET_ID,
repo_type="dataset",
)
# Also sync the txt file for legacy support
if os.path.exists(RESULTS_FILE):
api.upload_file(
path_or_fileobj=RESULTS_FILE,
path_in_repo=RESULTS_FILE,
repo_id=DATASET_ID,
repo_type="dataset",
)
print(" [CLOUD] Sync complete! Data is safe in your Dataset.")
except Exception as e:
print(f" [CLOUD] Sync error: {str(e)}")
def pull_from_cloud():
"""Downloads the latest data from Hugging Face on startup."""
if not HF_TOKEN or not DATASET_ID:
return
try:
print(f"[Monitor] Pulling history from {DATASET_ID}...")
hf_hub_download(
repo_id=DATASET_ID,
filename=CSV_FILE,
repo_type="dataset",
token=HF_TOKEN,
local_dir=".",
local_dir_use_symlinks=False
)
if os.path.exists(RESULTS_FILE):
hf_hub_download(
repo_id=DATASET_ID,
filename=RESULTS_FILE,
repo_type="dataset",
token=HF_TOKEN,
local_dir=".",
local_dir_use_symlinks=False
)
print("[Monitor] History successfully restored from cloud!")
except Exception:
print("[Monitor] No cloud history found, starting fresh.")
app = Flask(__name__)
@app.route('/')
def index():
return '''
Professor's Cloud Monitor
🧠 Virtual Football Cloud Monitor 👽
Status: Running 24/7
DOWNLOAD MATCH_HISTORY.CSV
Note: Download your data daily. Free Spaces reset periodically!
VIEW DEBUG SCREENSHOT
'''
@app.route('/download')
def download():
if os.path.exists("match_history.csv"):
return send_file("match_history.csv", as_attachment=True)
return "No data collected yet!"
@app.route('/debug')
def debug():
screenshots = sorted([f for f in os.listdir('.') if f.startswith('debug_') and f.endswith('.png')])
if not screenshots:
return "No debug screenshots yet. Wait for cycle 1."
return send_file(screenshots[-1], mimetype='image/png')
RESULTS_FILE = "england_virtual_results.txt"
CSV_FILE = "match_history.csv"
CHECK_INTERVAL = 15 # Check every 15s to catch the 84-90min window
URL = "https://www.sportybet.com/ng/sport/vFootball/sv:category:202120002/sv:league:2"
def save_result(home_team, away_team, home_score, away_score, h_odds='', d_odds='', a_odds=''):
"""Save match result to both txt and csv files."""
with open(RESULTS_FILE, 'a', encoding='utf-8') as f:
f.write(f"{home_team} {home_score}:{away_score} {away_team}\n")
file_exists = os.path.exists(CSV_FILE)
with open(CSV_FILE, 'a', newline='', encoding='utf-8') as f:
writer = csv.writer(f)
if not file_exists:
writer.writerow(['timestamp', 'home_team', 'away_team', 'home_score', 'away_score',
'h_odds', 'd_odds', 'a_odds'])
writer.writerow([
datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
home_team, away_team, home_score, away_score,
h_odds, d_odds, a_odds
])
print(f" [SAVED] {home_team} {home_score}:{away_score} {away_team} | Odds: H={h_odds} D={d_odds} A={a_odds}")
async def run_monitor():
"""Main monitoring loop — mirrors local monitor.py navigation exactly."""
# 1. Pull latest data from Hugging Face Dataset if configured
pull_from_cloud()
saved_matches = set()
# Load already saved matches from CSV (it has timestamps)
if os.path.exists(CSV_FILE):
with open(CSV_FILE, 'r', encoding='utf-8') as f:
reader = csv.DictReader(f)
for row in reader:
try:
# Create ID from teams + hour of match
dt = datetime.strptime(row['timestamp'], '%Y-%m-%d %H:%M:%S')
hour_id = dt.strftime('%Y%m%d%H')
saved_matches.add(f"{row['home_team']}_{row['away_team']}_{hour_id}")
except:
continue
print(f"[Monitor] {len(saved_matches)} unique hourly matches in history")
async with async_playwright() as p:
print("[Monitor] Launching headless browser...")
browser = await p.chromium.launch(headless=True)
context = await browser.new_context(
viewport={'width': 1920, 'height': 1080},
user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
)
page = await context.new_page()
print("[Monitor] Starting 24/7 monitoring loop...")
print(f"[Monitor] Checking every {CHECK_INTERVAL} seconds")
print(f"[Monitor] Saving to: {RESULTS_FILE} + {CSV_FILE}\n")
cycle = 0
while True:
cycle += 1
try:
print(f"\n--- Cycle {cycle} | {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} ---")
# ============================================================
# STEP 1: Navigate to the vFootball page (same URL as local)
# ============================================================
try:
await page.goto(URL, wait_until='domcontentloaded', timeout=45000)
await asyncio.sleep(5)
except Exception:
print(" Page load timed out, trying to continue...")
# ============================================================
# STEP 2: Click the "Live Betting" TOGGLE inside the vFootball
# page content — NOT the "Live Betting" in the top nav bar!
#
# The page has TWO elements with "Live Betting" text:
# - Top nav bar link → navigates AWAY (WRONG!)
# - In-page toggle → shows live matches (CORRECT!)
#
# The local monitor targets it with these XPATHs:
# //div[@role='text leaf' and contains(text(),'Live Betting')]
# //div[contains(@class,'league-title')]//span[@data-cms-key='live_betting']
# //div[contains(@class,'league-title')]//span[contains(text(),'Live Betting')]
# ============================================================
live_clicked = False
# Method 1: role='text leaf' (exact from local monitor line 58)
if not live_clicked:
try:
el = await page.query_selector("div[role='text leaf']")
if el:
text = await el.inner_text()
if 'Live Betting' in text:
await el.evaluate("el => el.click()")
live_clicked = True
print(" Clicked Live Betting (method 1: role text leaf)")
except Exception:
pass
# Method 2: inside league-title with data-cms-key (local monitor line 69)
if not live_clicked:
try:
el = await page.query_selector("div.league-title span[data-cms-key='live_betting']")
if el:
await el.evaluate("el => el.click()")
live_clicked = True
print(" Clicked Live Betting (method 2: league-title cms-key)")
except Exception:
pass
# Method 3: inside league-title with text (local monitor line 64)
if not live_clicked:
try:
el = await page.query_selector("div.league-title span:has-text('Live Betting')")
if el:
await el.evaluate("el => el.click()")
live_clicked = True
print(" Clicked Live Betting (method 3: league-title span text)")
except Exception:
pass
# Method 4: Look for the checkbox/link that says "Live Betting" with a number
# In the screenshot it shows "Live Betting 39" — target the one with a number
if not live_clicked:
try:
# Find all elements with "Live Betting", skip the top nav one
elements = await page.query_selector_all("*:has-text('Live Betting')")
for el in elements:
text = (await el.inner_text()).strip()
tag = await el.evaluate("el => el.tagName")
# The in-page one has a number like "Live Betting 39"
# The top nav just says "Live Betting"
if 'Live Betting' in text and any(c.isdigit() for c in text) and tag != 'NAV':
parent_class = await el.evaluate("el => el.className || ''")
if 'nav' not in parent_class.lower() and 'header' not in parent_class.lower():
await el.evaluate("el => el.click()")
live_clicked = True
print(f" Clicked Live Betting (method 4: element with number '{text[:30]}')")
break
except Exception:
pass
# Method 5: Use JavaScript to find and click (nuclear option)
if not live_clicked:
try:
clicked = await page.evaluate("""() => {
// Find all elements containing "Live Betting"
const walker = document.createTreeWalker(
document.body, NodeFilter.SHOW_ELEMENT
);
while (walker.nextNode()) {
const el = walker.currentNode;
const text = el.textContent || '';
const cls = el.className || '';
// Target elements inside league-title or with role text
if (text.includes('Live Betting') &&
(cls.includes('league-title') || el.getAttribute('role') === 'text leaf' ||
cls.includes('league-tab'))) {
el.click();
return 'clicked: ' + cls;
}
}
return null;
}""")
if clicked:
live_clicked = True
print(f" Clicked Live Betting (method 5: JS walker - {clicked})")
except Exception:
pass
if not live_clicked:
print(" WARNING: Could not find in-page Live Betting toggle!")
# Debug: screenshot to see current state
if cycle <= 10:
await page.screenshot(path=f"debug_livebtn_fail_{cycle}.png")
await asyncio.sleep(CHECK_INTERVAL)
continue
await asyncio.sleep(5)
# No need to click vFootball or England — the URL already targets
# the correct league. The Live Betting toggle just switches from
# "scheduled" to "live" matches within that league.
# ============================================================
# STEP 4: Wait for England Virtual matches — RETRY LOOP
# Local monitor retries up to 30 times (5 minutes).
# We do the same.
# ============================================================
max_attempts = 10
england_found = False
for attempt in range(max_attempts):
try:
# Look for England Virtual header (exact XPATH from local)
header = await page.query_selector(
"div.m-table-cell.league:has-text('England Virtual')"
)
if header:
print(f" Found 'England Virtual' header! (attempt {attempt + 1})")
england_found = True
break
except Exception:
pass
# Debug screenshot on first attempt
if attempt == 0 and cycle <= 5:
await page.screenshot(path=f"debug_cycle{cycle}_attempt{attempt}.png")
body = await page.inner_text("body")
preview = body[:400].replace('\n', ' | ')
print(f" [DEBUG] Content: {preview}")
print(f" England Virtual not found yet (attempt {attempt + 1}/{max_attempts}), waiting 10s...")
await asyncio.sleep(10)
if not england_found:
print(" No England Virtual matches found this cycle")
await asyncio.sleep(CHECK_INTERVAL)
continue
# ============================================================
# STEP 5: Extract ONLY England Virtual match rows
# The page shows matches from ALL leagues (England, Spain,
# Italy, France). We need only the rows between the
# "England Virtual" header and the next league header.
# This replicates the local monitor's XPATH logic.
# ============================================================
match_rows = await page.evaluate("""() => {
// Find all league headers and match rows
const headers = document.querySelectorAll('div.m-table-cell.league');
const allRows = document.querySelectorAll('div.m-table-row.m-content-row.match-row.vFootball-row');
// Find the England Virtual header position
let englandHeader = null;
let nextHeader = null;
let foundEngland = false;
for (const h of headers) {
if (h.textContent.includes('England Virtual')) {
englandHeader = h;
foundEngland = true;
} else if (foundEngland && !nextHeader) {
nextHeader = h;
}
}
if (!englandHeader) return [];
// Get the vertical position of England header and next header
const englandTop = englandHeader.getBoundingClientRect().top;
const nextTop = nextHeader ? nextHeader.getBoundingClientRect().top : 999999;
// Filter rows that are between England header and next header
const indices = [];
const rows = Array.from(allRows);
for (let i = 0; i < rows.length; i++) {
const rowTop = rows[i].getBoundingClientRect().top;
if (rowTop > englandTop && rowTop < nextTop) {
indices.push(i);
}
}
return indices;
}""")
# Now get only the England Virtual rows by index
all_rows = await page.query_selector_all(
"div.m-table-row.m-content-row.match-row.vFootball-row"
)
england_rows = [all_rows[i] for i in match_rows if i < len(all_rows)]
print(f" Found {len(england_rows)} England Virtual match rows (filtered from {len(all_rows)} total)")
if len(england_rows) < 10:
print(f" Less than 10 England matches, waiting...")
await asyncio.sleep(10)
continue
matches_saved_this_cycle = 0
for row in england_rows:
try:
# Get match time
time_el = await row.query_selector("div.clock-time")
if not time_el:
continue
match_time = (await time_el.inner_text()).strip()
if not match_time or ':' not in match_time:
continue
current_minute = int(match_time.split(':')[0])
# Get teams
home_el = await row.query_selector("div.home-team")
away_el = await row.query_selector("div.away-team")
if not home_el or not away_el:
continue
home_team = (await home_el.inner_text()).strip()
away_team = (await away_el.inner_text()).strip()
# Unique ID including the current hour so we can save
# the same fixture in different rounds
hour_id = datetime.now().strftime('%Y%m%d%H')
match_id = f"{home_team}_{away_team}_{hour_id}"
# Get scores
score_items = await row.query_selector_all("div.score-item")
if len(score_items) != 2:
continue
home_score = (await score_items[0].inner_text()).strip()
away_score = (await score_items[1].inner_text()).strip()
# Get 1X2 odds
h_odds = d_odds = a_odds = ''
try:
odds_els = await row.query_selector_all("span.m-outcome-odds")
if len(odds_els) >= 3:
h_odds = (await odds_els[0].inner_text()).strip()
d_odds = (await odds_els[1].inner_text()).strip()
a_odds = (await odds_els[2].inner_text()).strip()
except Exception:
pass
# Save at minute 84+ if not already saved
if current_minute >= 84 and match_id not in saved_matches:
save_result(home_team, away_team, home_score, away_score,
h_odds, d_odds, a_odds)
saved_matches.add(match_id)
matches_saved_this_cycle += 1
elif current_minute < 84:
print(f" {home_team} {home_score}:{away_score} {away_team} @ {match_time} (waiting...)")
except Exception:
continue
if matches_saved_this_cycle > 0:
print(f" Saved {matches_saved_this_cycle} new results this cycle")
# 2. Sync to cloud after every successful harvest
sync_to_cloud()
except Exception as e:
print(f" Error in cycle {cycle}: {str(e)}")
# Wait before next check
if cycle % 20 == 0: # Periodic sync every ~5 mins as backup
sync_to_cloud()
print(f" Next check in {CHECK_INTERVAL}s...")
await asyncio.sleep(CHECK_INTERVAL)
if __name__ == '__main__':
print("=" * 50)
print("VIRTUAL FOOTBALL CLOUD MONITOR")
print("Lightweight Playwright-based harvester")
print("=" * 50)
# Start web server in background (Port 7860 for HuggingFace)
def run_app():
app.run(host='0.0.0.0', port=7860)
threading.Thread(target=run_app, daemon=True).start()
# Run monitor
asyncio.run(run_monitor())