File size: 22,280 Bytes
dd75617
 
 
db68187
dd75617
 
 
 
 
 
 
 
 
 
 
 
 
 
fd70cd5
 
31bae6a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fd70cd5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2af2917
 
 
 
fd70cd5
 
 
 
 
 
 
 
 
 
2af2917
 
db68187
2af2917
db68187
 
2af2917
dd75617
 
 
6e41a7d
dd75617
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
db68187
31bae6a
 
 
 
db68187
 
6e41a7d
 
 
 
 
 
 
 
 
 
 
 
 
dd75617
 
 
 
 
 
db68187
dd75617
 
 
 
 
 
 
 
 
 
 
 
 
db68187
 
 
fd70cd5
 
6b673cc
 
db68187
 
 
bfcf9fc
 
 
 
 
 
 
 
 
 
 
db68187
 
6b673cc
bfcf9fc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
db68187
bfcf9fc
db68187
 
bfcf9fc
db68187
 
 
bfcf9fc
db68187
 
 
bfcf9fc
 
db68187
 
bfcf9fc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
db68187
 
 
 
bfcf9fc
 
 
 
db68187
 
6b673cc
db68187
dd75617
bfcf9fc
 
 
db68187
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dd75617
 
 
 
db68187
88cc391
 
 
 
 
db68187
88cc391
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dd75617
 
88cc391
 
dd75617
88cc391
 
db68187
88cc391
db68187
dd75617
88cc391
dd75617
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6e41a7d
 
 
 
dd75617
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
db68187
dd75617
 
 
 
31bae6a
 
dd75617
 
 
 
 
31bae6a
 
 
dd75617
 
 
 
 
 
 
 
 
db68187
fd70cd5
 
 
db68187
fd70cd5
db68187
fd70cd5
dd75617
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
"""
CLOUD MONITOR β€” Lightweight monitor for Hugging Face Spaces
Uses Playwright (headless Chromium) instead of Selenium+Firefox.
Replicates the exact navigation flow of the local monitor.py.
"""
import os
import csv
import time
import asyncio
from datetime import datetime

try:
    from playwright.async_api import async_playwright
except ImportError:
    print("Installing playwright...")
    os.system("pip install playwright && playwright install chromium")
    from playwright.async_api import async_playwright

from flask import Flask, send_file
import threading
import asyncio
from huggingface_hub import HfApi, hf_hub_download

# --- Cloud Storage Settings ---
HF_TOKEN = os.getenv("HF_TOKEN")
# Format: "your-username/your-dataset-name" (e.g., "Teatop/sporty-data")
DATASET_ID = os.getenv("HF_DATASET") 

def sync_to_cloud():
    """Uploads the local CSV to Hugging Face Dataset."""
    if not HF_TOKEN or not DATASET_ID:
        return
    try:
        api = HfApi(token=HF_TOKEN)
        api.upload_file(
            path_or_fileobj=CSV_FILE,
            path_in_repo=CSV_FILE,
            repo_id=DATASET_ID,
            repo_type="dataset",
        )
        # Also sync the txt file for legacy support
        if os.path.exists(RESULTS_FILE):
            api.upload_file(
                path_or_fileobj=RESULTS_FILE,
                path_in_repo=RESULTS_FILE,
                repo_id=DATASET_ID,
                repo_type="dataset",
            )
        print("  [CLOUD] Sync complete! Data is safe in your Dataset.")
    except Exception as e:
        print(f"  [CLOUD] Sync error: {str(e)}")

def pull_from_cloud():
    """Downloads the latest data from Hugging Face on startup."""
    if not HF_TOKEN or not DATASET_ID:
        return
    try:
        print(f"[Monitor] Pulling history from {DATASET_ID}...")
        hf_hub_download(
            repo_id=DATASET_ID,
            filename=CSV_FILE,
            repo_type="dataset",
            token=HF_TOKEN,
            local_dir=".",
            local_dir_use_symlinks=False
        )
        if os.path.exists(RESULTS_FILE):
            hf_hub_download(
                repo_id=DATASET_ID,
                filename=RESULTS_FILE,
                repo_type="dataset",
                token=HF_TOKEN,
                local_dir=".",
                local_dir_use_symlinks=False
            )
        print("[Monitor] History successfully restored from cloud!")
    except Exception:
        print("[Monitor] No cloud history found, starting fresh.")

app = Flask(__name__)

@app.route('/')
def index():
    return '''
    <html>
        <head><title>Professor's Cloud Monitor</title></head>
        <body style="font-family: sans-serif; text-align: center; padding: 50px; background: #121212; color: white;">
            <h1>🧠 Virtual Football Cloud Monitor πŸ‘½</h1>
            <p>Status: Running 24/7</p>
            <br>
            <a href="/download" style="background: #00ff00; color: black; padding: 15px 30px; text-decoration: none; border-radius: 5px; font-weight: bold;">
                DOWNLOAD MATCH_HISTORY.CSV
            </a>
            <p style="margin-top: 50px; color: #888;">Note: Download your data daily. Free Spaces reset periodically!</p>
            <br>
            <a href="/debug" style="background: #ff6600; color: white; padding: 10px 20px; text-decoration: none; border-radius: 5px;">
                VIEW DEBUG SCREENSHOT
            </a>
        </body>
    </html>
    '''

@app.route('/download')
def download():
    if os.path.exists("match_history.csv"):
        return send_file("match_history.csv", as_attachment=True)
    return "No data collected yet!"

@app.route('/debug')
def debug():
    screenshots = sorted([f for f in os.listdir('.') if f.startswith('debug_') and f.endswith('.png')])
    if not screenshots:
        return "No debug screenshots yet. Wait for cycle 1."
    return send_file(screenshots[-1], mimetype='image/png')


RESULTS_FILE = "england_virtual_results.txt"
CSV_FILE = "match_history.csv"
CHECK_INTERVAL = 15  # Check every 15s to catch the 84-90min window
URL = "https://www.sportybet.com/ng/sport/vFootball/sv:category:202120002/sv:league:2"


def save_result(home_team, away_team, home_score, away_score, h_odds='', d_odds='', a_odds=''):
    """Save match result to both txt and csv files."""
    with open(RESULTS_FILE, 'a', encoding='utf-8') as f:
        f.write(f"{home_team} {home_score}:{away_score} {away_team}\n")

    file_exists = os.path.exists(CSV_FILE)
    with open(CSV_FILE, 'a', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        if not file_exists:
            writer.writerow(['timestamp', 'home_team', 'away_team', 'home_score', 'away_score',
                             'h_odds', 'd_odds', 'a_odds'])
        writer.writerow([
            datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
            home_team, away_team, home_score, away_score,
            h_odds, d_odds, a_odds
        ])
    print(f"  [SAVED] {home_team} {home_score}:{away_score} {away_team} | Odds: H={h_odds} D={d_odds} A={a_odds}")


async def run_monitor():
    """Main monitoring loop β€” mirrors local monitor.py navigation exactly."""
    
    # 1. Pull latest data from Hugging Face Dataset if configured
    pull_from_cloud()
    
    saved_matches = set()

    # Load already saved matches from CSV (it has timestamps)
    if os.path.exists(CSV_FILE):
        with open(CSV_FILE, 'r', encoding='utf-8') as f:
            reader = csv.DictReader(f)
            for row in reader:
                try:
                    # Create ID from teams + hour of match
                    dt = datetime.strptime(row['timestamp'], '%Y-%m-%d %H:%M:%S')
                    hour_id = dt.strftime('%Y%m%d%H')
                    saved_matches.add(f"{row['home_team']}_{row['away_team']}_{hour_id}")
                except:
                    continue
        print(f"[Monitor] {len(saved_matches)} unique hourly matches in history")

    async with async_playwright() as p:
        print("[Monitor] Launching headless browser...")
        browser = await p.chromium.launch(headless=True)
        context = await browser.new_context(
            viewport={'width': 1920, 'height': 1080},
            user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
        )
        page = await context.new_page()

        print("[Monitor] Starting 24/7 monitoring loop...")
        print(f"[Monitor] Checking every {CHECK_INTERVAL} seconds")
        print(f"[Monitor] Saving to: {RESULTS_FILE} + {CSV_FILE}\n")

        cycle = 0
        while True:
            cycle += 1
            try:
                print(f"\n--- Cycle {cycle} | {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} ---")

                # ============================================================
                # STEP 1: Navigate to the vFootball page (same URL as local)
                # ============================================================
                try:
                    await page.goto(URL, wait_until='domcontentloaded', timeout=45000)
                    await asyncio.sleep(5)
                except Exception:
                    print("  Page load timed out, trying to continue...")

                # ============================================================
                # STEP 2: Click the "Live Betting" TOGGLE inside the vFootball
                # page content β€” NOT the "Live Betting" in the top nav bar!
                #
                # The page has TWO elements with "Live Betting" text:
                #   - Top nav bar link β†’ navigates AWAY (WRONG!)
                #   - In-page toggle β†’ shows live matches (CORRECT!)
                #
                # The local monitor targets it with these XPATHs:
                #   //div[@role='text leaf' and contains(text(),'Live Betting')]
                #   //div[contains(@class,'league-title')]//span[@data-cms-key='live_betting']
                #   //div[contains(@class,'league-title')]//span[contains(text(),'Live Betting')]
                # ============================================================
                live_clicked = False

                # Method 1: role='text leaf' (exact from local monitor line 58)
                if not live_clicked:
                    try:
                        el = await page.query_selector("div[role='text leaf']")
                        if el:
                            text = await el.inner_text()
                            if 'Live Betting' in text:
                                await el.evaluate("el => el.click()")
                                live_clicked = True
                                print("  Clicked Live Betting (method 1: role text leaf)")
                    except Exception:
                        pass

                # Method 2: inside league-title with data-cms-key (local monitor line 69)
                if not live_clicked:
                    try:
                        el = await page.query_selector("div.league-title span[data-cms-key='live_betting']")
                        if el:
                            await el.evaluate("el => el.click()")
                            live_clicked = True
                            print("  Clicked Live Betting (method 2: league-title cms-key)")
                    except Exception:
                        pass

                # Method 3: inside league-title with text (local monitor line 64)
                if not live_clicked:
                    try:
                        el = await page.query_selector("div.league-title span:has-text('Live Betting')")
                        if el:
                            await el.evaluate("el => el.click()")
                            live_clicked = True
                            print("  Clicked Live Betting (method 3: league-title span text)")
                    except Exception:
                        pass

                # Method 4: Look for the checkbox/link that says "Live Betting" with a number
                # In the screenshot it shows "Live Betting 39" β€” target the one with a number
                if not live_clicked:
                    try:
                        # Find all elements with "Live Betting", skip the top nav one
                        elements = await page.query_selector_all("*:has-text('Live Betting')")
                        for el in elements:
                            text = (await el.inner_text()).strip()
                            tag = await el.evaluate("el => el.tagName")
                            # The in-page one has a number like "Live Betting 39"
                            # The top nav just says "Live Betting"
                            if 'Live Betting' in text and any(c.isdigit() for c in text) and tag != 'NAV':
                                parent_class = await el.evaluate("el => el.className || ''")
                                if 'nav' not in parent_class.lower() and 'header' not in parent_class.lower():
                                    await el.evaluate("el => el.click()")
                                    live_clicked = True
                                    print(f"  Clicked Live Betting (method 4: element with number '{text[:30]}')")
                                    break
                    except Exception:
                        pass

                # Method 5: Use JavaScript to find and click (nuclear option)
                if not live_clicked:
                    try:
                        clicked = await page.evaluate("""() => {
                            // Find all elements containing "Live Betting"
                            const walker = document.createTreeWalker(
                                document.body, NodeFilter.SHOW_ELEMENT
                            );
                            while (walker.nextNode()) {
                                const el = walker.currentNode;
                                const text = el.textContent || '';
                                const cls = el.className || '';
                                // Target elements inside league-title or with role text
                                if (text.includes('Live Betting') && 
                                    (cls.includes('league-title') || el.getAttribute('role') === 'text leaf' || 
                                     cls.includes('league-tab'))) {
                                    el.click();
                                    return 'clicked: ' + cls;
                                }
                            }
                            return null;
                        }""")
                        if clicked:
                            live_clicked = True
                            print(f"  Clicked Live Betting (method 5: JS walker - {clicked})")
                    except Exception:
                        pass

                if not live_clicked:
                    print("  WARNING: Could not find in-page Live Betting toggle!")
                    # Debug: screenshot to see current state
                    if cycle <= 10:
                        await page.screenshot(path=f"debug_livebtn_fail_{cycle}.png")
                    await asyncio.sleep(CHECK_INTERVAL)
                    continue

                await asyncio.sleep(5)

                # No need to click vFootball or England β€” the URL already targets
                # the correct league. The Live Betting toggle just switches from
                # "scheduled" to "live" matches within that league.

                # ============================================================
                # STEP 4: Wait for England Virtual matches β€” RETRY LOOP
                # Local monitor retries up to 30 times (5 minutes).
                # We do the same.
                # ============================================================
                max_attempts = 10
                england_found = False

                for attempt in range(max_attempts):
                    try:
                        # Look for England Virtual header (exact XPATH from local)
                        header = await page.query_selector(
                            "div.m-table-cell.league:has-text('England Virtual')"
                        )
                        if header:
                            print(f"  Found 'England Virtual' header! (attempt {attempt + 1})")
                            england_found = True
                            break
                    except Exception:
                        pass

                    # Debug screenshot on first attempt
                    if attempt == 0 and cycle <= 5:
                        await page.screenshot(path=f"debug_cycle{cycle}_attempt{attempt}.png")
                        body = await page.inner_text("body")
                        preview = body[:400].replace('\n', ' | ')
                        print(f"  [DEBUG] Content: {preview}")

                    print(f"  England Virtual not found yet (attempt {attempt + 1}/{max_attempts}), waiting 10s...")
                    await asyncio.sleep(10)

                if not england_found:
                    print("  No England Virtual matches found this cycle")
                    await asyncio.sleep(CHECK_INTERVAL)
                    continue

                # ============================================================
                # STEP 5: Extract ONLY England Virtual match rows
                # The page shows matches from ALL leagues (England, Spain, 
                # Italy, France). We need only the rows between the 
                # "England Virtual" header and the next league header.
                # This replicates the local monitor's XPATH logic.
                # ============================================================
                match_rows = await page.evaluate("""() => {
                    // Find all league headers and match rows
                    const headers = document.querySelectorAll('div.m-table-cell.league');
                    const allRows = document.querySelectorAll('div.m-table-row.m-content-row.match-row.vFootball-row');
                    
                    // Find the England Virtual header position
                    let englandHeader = null;
                    let nextHeader = null;
                    let foundEngland = false;
                    
                    for (const h of headers) {
                        if (h.textContent.includes('England Virtual')) {
                            englandHeader = h;
                            foundEngland = true;
                        } else if (foundEngland && !nextHeader) {
                            nextHeader = h;
                        }
                    }
                    
                    if (!englandHeader) return [];
                    
                    // Get the vertical position of England header and next header
                    const englandTop = englandHeader.getBoundingClientRect().top;
                    const nextTop = nextHeader ? nextHeader.getBoundingClientRect().top : 999999;
                    
                    // Filter rows that are between England header and next header
                    const indices = [];
                    const rows = Array.from(allRows);
                    for (let i = 0; i < rows.length; i++) {
                        const rowTop = rows[i].getBoundingClientRect().top;
                        if (rowTop > englandTop && rowTop < nextTop) {
                            indices.push(i);
                        }
                    }
                    return indices;
                }""")

                # Now get only the England Virtual rows by index
                all_rows = await page.query_selector_all(
                    "div.m-table-row.m-content-row.match-row.vFootball-row"
                )
                england_rows = [all_rows[i] for i in match_rows if i < len(all_rows)]
                print(f"  Found {len(england_rows)} England Virtual match rows (filtered from {len(all_rows)} total)")

                if len(england_rows) < 10:
                    print(f"  Less than 10 England matches, waiting...")
                    await asyncio.sleep(10)
                    continue

                matches_saved_this_cycle = 0
                for row in england_rows:
                    try:
                        # Get match time
                        time_el = await row.query_selector("div.clock-time")
                        if not time_el:
                            continue
                        match_time = (await time_el.inner_text()).strip()
                        if not match_time or ':' not in match_time:
                            continue

                        current_minute = int(match_time.split(':')[0])

                        # Get teams
                        home_el = await row.query_selector("div.home-team")
                        away_el = await row.query_selector("div.away-team")
                        if not home_el or not away_el:
                            continue
                        home_team = (await home_el.inner_text()).strip()
                        away_team = (await away_el.inner_text()).strip()

                        # Unique ID including the current hour so we can save 
                        # the same fixture in different rounds
                        hour_id = datetime.now().strftime('%Y%m%d%H')
                        match_id = f"{home_team}_{away_team}_{hour_id}"

                        # Get scores
                        score_items = await row.query_selector_all("div.score-item")
                        if len(score_items) != 2:
                            continue
                        home_score = (await score_items[0].inner_text()).strip()
                        away_score = (await score_items[1].inner_text()).strip()

                        # Get 1X2 odds
                        h_odds = d_odds = a_odds = ''
                        try:
                            odds_els = await row.query_selector_all("span.m-outcome-odds")
                            if len(odds_els) >= 3:
                                h_odds = (await odds_els[0].inner_text()).strip()
                                d_odds = (await odds_els[1].inner_text()).strip()
                                a_odds = (await odds_els[2].inner_text()).strip()
                        except Exception:
                            pass

                        # Save at minute 84+ if not already saved
                        if current_minute >= 84 and match_id not in saved_matches:
                            save_result(home_team, away_team, home_score, away_score,
                                        h_odds, d_odds, a_odds)
                            saved_matches.add(match_id)
                            matches_saved_this_cycle += 1
                        elif current_minute < 84:
                            print(f"  {home_team} {home_score}:{away_score} {away_team} @ {match_time} (waiting...)")

                    except Exception:
                        continue

                if matches_saved_this_cycle > 0:
                    print(f"  Saved {matches_saved_this_cycle} new results this cycle")
                    # 2. Sync to cloud after every successful harvest
                    sync_to_cloud()

            except Exception as e:
                print(f"  Error in cycle {cycle}: {str(e)}")

            # Wait before next check
            if cycle % 20 == 0: # Periodic sync every ~5 mins as backup
                sync_to_cloud()
                
            print(f"  Next check in {CHECK_INTERVAL}s...")
            await asyncio.sleep(CHECK_INTERVAL)


if __name__ == '__main__':
    print("=" * 50)
    print("VIRTUAL FOOTBALL CLOUD MONITOR")
    print("Lightweight Playwright-based harvester")
    print("=" * 50)

    # Start web server in background (Port 7860 for HuggingFace)
    def run_app():
        app.run(host='0.0.0.0', port=7860)

    threading.Thread(target=run_app, daemon=True).start()

    # Run monitor
    asyncio.run(run_monitor())