Alpha108 commited on
Commit
fd7d2b3
·
verified ·
1 Parent(s): b7e5927

Update src/data_sources.py

Browse files
Files changed (1) hide show
  1. src/data_sources.py +30 -60
src/data_sources.py CHANGED
@@ -1,64 +1,34 @@
1
- import cbpy as cb
2
- import pandas as pd
3
- from typing import Dict, Any
4
 
5
- def list_live_matches() -> pd.DataFrame:
6
- """Return minimal live match list (id, title)."""
7
- try:
8
- matches = cb.Cricbuzz().matches() # pulls cricbuzz matches.json
9
- rows = []
10
- for m in matches.get("matches", []):
11
- if m.get("mchstate") in {"preview","inprogress","innings break","toss"}:
12
- rows.append({
13
- "match_id": m.get("id"),
14
- "title": f'{m.get("srs", "")} - {m.get("mnum", "")} | {m.get("team1", {}).get("name")} vs {m.get("team2", {}).get("name")}',
15
- "state": m.get("mchstate"),
16
- })
17
- return pd.DataFrame(rows)
18
- except Exception:
19
- return pd.DataFrame([])
20
 
21
- def scorecard(match_id: str) -> Dict[str, Any]:
22
- """Fetch a compact scorecard/summary for a match id."""
23
- c = cb.Cricbuzz()
24
- try:
25
- sc = c.scorecard(match_id) # team info, batsmen, bowlers, overs
26
- return sc
27
- except Exception:
28
- return {}
29
 
30
- def extract_simple_context(sc: Dict[str, Any]) -> Dict[str, Any]:
31
- """Minimal, robust context from scorecard response."""
32
- if not sc or "scorecard" not in sc:
33
- return {}
34
- # Grab the latest innings block
35
- inn = sc["scorecard"][-1]
36
- return {
37
- "batting_team": inn.get("batteam", {}).get("name"),
38
- "bowling_team": inn.get("bowlteam", {}).get("name"),
39
- "score": inn.get("runs"),
40
- "wickets": inn.get("wickets"),
41
- "overs": inn.get("overs"),
42
- "rr": inn.get("rr"),
43
- "crr": inn.get("crr"),
44
- "players": {
45
- "batsmen": [b.get("name") for b in inn.get("batcard", [])],
46
- "bowlers": [b.get("name") for b in inn.get("bowlcard", [])],
47
- }
48
- }
49
 
50
- def striker_form_last_overs(sc: Dict[str, Any], striker_name: str) -> Dict[str, Any]:
51
- """Very light ‘recent pattern’ proxy using scorecard strike rates and runs."""
52
- if not sc or "scorecard" not in sc:
53
- return {}
54
- inn = sc["scorecard"][-1]
55
- bats = {b.get("name"): b for b in inn.get("batcard", [])}
56
- s = bats.get(striker_name, {})
57
- return {
58
- "runs": s.get("runs"),
59
- "balls": s.get("balls"),
60
- "sr": s.get("sr"),
61
- "fours": s.get("fours"),
62
- "sixes": s.get("sixers") or s.get("six", None),
63
- "note": "Approximation from live card; ball-by-ball upgrades later."
64
- }
 
 
 
 
1
+ import requests
2
+ from bs4 import BeautifulSoup
 
3
 
4
+ # Example: scrape live matches (ESPNCricinfo or Cricbuzz)
5
+ def list_live_matches():
6
+ url = "https://www.espncricinfo.com/live-cricket-match-results"
7
+ response = requests.get(url)
8
+ soup = BeautifulSoup(response.text, "html.parser")
 
 
 
 
 
 
 
 
 
 
9
 
10
+ matches = []
11
+ for item in soup.select(".ds-px-4.ds-py-3"):
12
+ title = item.get_text(" ", strip=True)
13
+ matches.append(title)
 
 
 
 
14
 
15
+ return matches
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
+
18
+ def scorecard(match_url: str):
19
+ response = requests.get(match_url)
20
+ soup = BeautifulSoup(response.text, "html.parser")
21
+
22
+ # (MVP: extract only scores; later we can refine for player stats)
23
+ score = soup.select_one(".ds-text-compact-xxs.ds-font-medium")
24
+ return score.get_text(strip=True) if score else "No score found"
25
+
26
+
27
+ def extract_simple_context(match_url: str):
28
+ # For MVP: return score only
29
+ return scorecard(match_url)
30
+
31
+
32
+ def striker_form_last_overs(player_name: str):
33
+ # Placeholder – later we’ll integrate CricAPI or Stats API
34
+ return f"Recent form data for {player_name} (to be implemented)"