DKethan commited on
Commit
42d29ab
Β·
verified Β·
1 Parent(s): aaa1814

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -118
app.py CHANGED
@@ -2,7 +2,6 @@ import streamlit as st
2
  import requests
3
  from bs4 import BeautifulSoup
4
 
5
- # --- IPL TEAMS ---
6
  IPL_TEAMS = [
7
  "Chennai Super Kings", "Delhi Capitals", "Gujarat Titans", "Kolkata Knight Riders",
8
  "Lucknow Super Giants", "Mumbai Indians", "Punjab Kings", "Rajasthan Royals",
@@ -11,152 +10,113 @@ IPL_TEAMS = [
11
 
12
  BASE_URL = "https://www.cricbuzz.com"
13
 
14
- # --- Detect Team ---
 
15
  def detect_team(user_input):
16
- print(f"🟒 [DETECT] Analyzing input: {user_input}")
17
  for team in IPL_TEAMS:
18
  if team.lower() in user_input.lower():
19
- print(f"βœ… [DETECT] Team found: {team}")
20
  return team
21
- print("❌ [DETECT] No IPL team detected in input.")
22
  return None
23
 
24
- # --- Crawl main matches page and extract links ---
25
- def get_match_links_from_main_page():
26
- try:
27
- url = f"{BASE_URL}/cricket-series/7607/indian-premier-league-2024/matches"
28
- print(f"\n🌐 [CRAWL] Fetching page: {url}")
29
- headers = {"User-Agent": "Mozilla/5.0"}
30
- response = requests.get(url, headers=headers)
31
- print(f"βœ… [CRAWL] Status code: {response.status_code}")
32
-
33
- soup = BeautifulSoup(response.text, "html.parser")
34
-
35
- all_a_tags = soup.find_all("a", href=True)
36
- print(f"πŸ” [CRAWL] Total <a> tags found: {len(all_a_tags)}")
37
 
38
- links = []
39
- for i in range(len(all_a_tags) - 1):
40
- tag = all_a_tags[i]
41
- next_tag = all_a_tags[i + 1]
42
-
43
- text = tag.get_text().strip()
44
- href = tag['href']
45
-
46
- # Looking for IPL match result + scorecard links
47
- if "cricket-scores" in href and any(team.lower() in text.lower() for team in IPL_TEAMS):
48
- title = text
49
- result = next_tag.get_text().strip()
50
- full_url = BASE_URL + href
51
- print(f"\n🎯 [MATCH FOUND]")
52
- print(f"🏷️ Title: {title}")
53
- print(f"πŸ“Š Result: {result}")
54
- print(f"πŸ”— Link: {full_url}")
55
-
56
- links.append((f"{title} – {result}", full_url))
57
-
58
- print(f"\nβœ… [DONE] Total match results extracted: {len(links)}")
59
- return links
60
-
61
- except Exception as e:
62
- print(f"❌ [CRAWL ERROR] Failed to fetch match links: {e}")
63
- return []
64
 
65
-
66
- # --- Crawl each link and extract score ---
67
- def get_scores_from_links(team_name=None):
68
- print("🟑 [STEP 1] Start crawling for scores...")
69
- links = get_match_links_from_main_page()
70
- print(f"βœ… [STEP 1] Links to process: {len(links)}")
71
-
72
- if not links:
73
- print("❌ [ERROR] No links found.")
74
- return ["No match links found."]
75
 
76
  results = []
 
 
 
 
 
77
 
78
- for i, (text, link) in enumerate(links):
79
- print(f"\nπŸ”— [PROCESS] Link {i+1}: {text} -> {link}")
80
 
81
- if team_name and team_name.lower() not in text.lower():
82
- print(f"⏭️ [SKIP] Team '{team_name}' not in link title.")
83
- continue
84
 
85
- try:
86
- print(f"🌐 [FETCH] Visiting {link}...")
87
- resp = requests.get(link, headers={"User-Agent": "Mozilla/5.0"})
88
- print(f"βœ… [FETCH] Status: {resp.status_code}")
 
 
 
89
 
90
- soup = BeautifulSoup(resp.text, "html.parser")
 
 
91
 
92
- title = soup.title.text.strip() if soup.title else "No Title Found"
93
- print(f"πŸ“Œ [TITLE] {title}")
94
 
95
- raw_text = soup.get_text(separator="\n").strip()
96
- print("🧾 [HTML TEXT] Preview:")
97
- print(raw_text[:500] + "\n...")
98
 
99
- summary_container = soup.find("div", class_="cb-col cb-col-100 cb-ltst-wgt-hdr")
100
- summary = summary_container.text.strip() if summary_container else text
101
- print(f"πŸ“‹ [SUMMARY] {summary[:200]}...")
102
 
103
- result = f"πŸ•“ {title}\n{summary}\n[View Match]({link})\n"
104
- results.append(result)
105
- print("βœ… [RESULT] Added βœ”οΈ")
106
 
107
- except Exception as e:
108
- print(f"❌ [ERROR] Failed to process link {link}: {e}")
109
 
110
- if not results:
111
- print("❌ [FINAL] No results matched filters.")
112
- return ["No scores found for the team."]
 
 
 
113
 
114
- print(f"βœ… [DONE] Returning {len(results)} results.")
115
- return results
 
116
 
117
- # --- Fantasy placeholder ---
118
- def get_fantasy_stats(team_name=None):
119
- print(f"[FANTASY] Placeholder for team: {team_name}")
120
- return [f"Fantasy stats for {team_name or 'your team'} are currently limited. Try Dream11 or official IPL fantasy apps."]
121
 
122
- # --- News placeholder ---
123
- def get_team_news(team_name=None):
124
- print(f"[NEWS] Placeholder for team: {team_name}")
125
- return [f"News for {team_name or 'IPL'} is coming soon! Stay tuned."]
126
 
127
- # --- Streamlit UI ---
128
  st.set_page_config(page_title="IPL Chatbot", layout="centered")
129
- st.title("🏏 IPL Chatbot with Crawl4AI-style Debugging")
130
-
131
- user_input = st.text_input("Ask me anything about IPL scores, teams, or fantasy:", placeholder="e.g. Show Mumbai Indians score")
132
 
133
  if user_input:
134
- print(f"\n========== USER INPUT RECEIVED ==========\n{user_input}\n=========================================")
135
- team = detect_team(user_input)
 
136
 
137
- if "score" in user_input.lower():
138
- st.subheader("πŸ“Š IPL Match Results")
139
- scores = get_scores_from_links(team)
140
- for score in scores:
141
- st.markdown(score)
142
-
143
- elif "fantasy" in user_input.lower():
144
- st.subheader("πŸ“ˆ Fantasy Info")
145
- for stat in get_fantasy_stats(team):
146
- st.write(stat)
147
-
148
- elif "news" in user_input.lower():
149
- st.subheader("πŸ“° News")
150
- for news in get_team_news(team):
151
- st.write(news)
152
 
 
 
 
 
 
153
  else:
154
- st.info("Try asking about 'scores', 'fantasy', or 'news' for a specific IPL team.")
 
 
 
155
 
156
- with st.expander("πŸ’‘ Try these examples"):
157
  st.markdown("""
158
- - Show me Mumbai Indians score
159
- - Fantasy info for RCB
160
- - News about CSK
161
- - What’s the latest from Gujarat Titans?
162
- """)
 
2
  import requests
3
  from bs4 import BeautifulSoup
4
 
 
5
  IPL_TEAMS = [
6
  "Chennai Super Kings", "Delhi Capitals", "Gujarat Titans", "Kolkata Knight Riders",
7
  "Lucknow Super Giants", "Mumbai Indians", "Punjab Kings", "Rajasthan Royals",
 
10
 
11
  BASE_URL = "https://www.cricbuzz.com"
12
 
13
+ # Detect team
14
+
15
  def detect_team(user_input):
16
+ print(f"\n🟒 [DETECT] Input: {user_input}")
17
  for team in IPL_TEAMS:
18
  if team.lower() in user_input.lower():
19
+ print(f"βœ… [DETECT] Found team: {team}")
20
  return team
21
+ print("❌ [DETECT] No IPL team found")
22
  return None
23
 
24
+ # Get live matches from cricbuzz homepage
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
+ def get_live_scores(team_name=None):
27
+ print("\n🌐 [LIVE] Fetching live scores page...")
28
+ url = "https://www.cricbuzz.com/cricket-match/live-scores"
29
+ headers = {"User-Agent": "Mozilla/5.0"}
30
+ response = requests.get(url, headers=headers)
31
+ print(f"βœ… [LIVE] Response code: {response.status_code}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
+ soup = BeautifulSoup(response.text, "html.parser")
34
+ matches = soup.find_all("div", class_="cb-col cb-col-100 cb-ltst-wgt-hdr")
35
+ print(f"πŸ” [LIVE] Found {len(matches)} match blocks")
 
 
 
 
 
 
 
36
 
37
  results = []
38
+ for block in matches:
39
+ text = block.get_text(separator=" ").strip()
40
+ if team_name is None or team_name.lower() in text.lower():
41
+ results.append("🟒 LIVE: " + text)
42
+ print(f"βœ… [LIVE MATCH] {text[:100]}...")
43
 
44
+ return results[:5] if results else []
 
45
 
46
+ # Get recent results if no live match
 
 
47
 
48
+ def get_recent_results(team_name=None):
49
+ try:
50
+ url = f"{BASE_URL}/cricket-series/7607/indian-premier-league-2024/matches"
51
+ print(f"\n🌐 [RECENT] Fetching matches from: {url}")
52
+ headers = {"User-Agent": "Mozilla/5.0"}
53
+ response = requests.get(url, headers=headers)
54
+ print(f"βœ… [RECENT] Status code: {response.status_code}")
55
 
56
+ soup = BeautifulSoup(response.text, "html.parser")
57
+ all_tags = soup.find_all("a", href=True)
58
+ print(f"πŸ” [RECENT] Total <a> tags: {len(all_tags)}")
59
 
60
+ won, lost = [], []
 
61
 
62
+ for i in range(len(all_tags) - 1):
63
+ tag = all_tags[i]
64
+ next_tag = all_tags[i + 1]
65
 
66
+ href = tag['href']
67
+ title = tag.get_text().strip()
68
+ result = next_tag.get_text().strip()
69
 
70
+ if "cricket-scores" in href and any(t.lower() in title.lower() for t in IPL_TEAMS):
71
+ full_url = BASE_URL + href
72
+ full_text = f"{title} – {result}\n[View Match]({full_url})"
73
 
74
+ if team_name and team_name.lower() not in title.lower():
75
+ continue
76
 
77
+ if "won by" in result.lower():
78
+ print(f"βœ… [WON] {title} => {result}")
79
+ won.append(full_text)
80
+ elif "lost" in result.lower():
81
+ print(f"πŸŸ₯ [LOST] {title} => {result}")
82
+ lost.append(full_text)
83
 
84
+ top_results = won[:5] + lost[:5]
85
+ print(f"πŸ“Š [RECENT] Total displayed: {len(top_results)}")
86
+ return top_results if top_results else ["No recent match results found."]
87
 
88
+ except Exception as e:
89
+ print(f"❌ [ERROR] Failed recent match fetch: {e}")
90
+ return ["Error loading past matches."]
 
91
 
92
+ # Streamlit UI
 
 
 
93
 
 
94
  st.set_page_config(page_title="IPL Chatbot", layout="centered")
95
+ st.title("🏏 IPL Chatbot")
96
+ user_input = st.text_input("Ask about IPL scores:", placeholder="e.g. Show me Mumbai Indians score")
 
97
 
98
  if user_input:
99
+ print(f"\n========== USER INPUT ==========")
100
+ print(user_input)
101
+ print("================================")
102
 
103
+ team = detect_team(user_input)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
 
105
+ scores = get_live_scores(team)
106
+ if scores:
107
+ st.subheader("🟒 Live Match(es):")
108
+ for s in scores:
109
+ st.markdown(s)
110
  else:
111
+ st.warning("❌ No live match found. Showing recent results:")
112
+ past = get_recent_results(team)
113
+ for p in past:
114
+ st.markdown(p)
115
 
116
+ with st.expander("πŸ’‘ Try examples"):
117
  st.markdown("""
118
+ - Show Mumbai Indians score
119
+ - Delhi Capitals recent results
120
+ - CSK won?
121
+ - RCB vs MI today?
122
+ """)