Spaces:
Sleeping
Sleeping
| from flask import Flask, render_template, request, jsonify | |
| from flask_cors import CORS | |
| import re | |
| import requests | |
| from bs4 import BeautifulSoup | |
| import tempfile | |
| import os | |
| from playwright.sync_api import sync_playwright | |
| from datetime import datetime | |
| import pytz | |
| os.environ["SE_CACHE_PATH"] = "/tmp/selenium" | |
| app = Flask(__name__) | |
| CORS(app) | |
| import re | |
| import requests | |
| from bs4 import BeautifulSoup | |
| from flask import jsonify | |
| def getRouteList(): | |
| url = "https://www.transsee.ca/routelist?a=ttc&ShowAll=1" | |
| headers = { | |
| 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36', | |
| 'Referer': 'https://www.transsee.ca/', | |
| 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', | |
| 'Accept-Language': 'en-US,en;q=0.9', | |
| 'DNT': '1', | |
| 'Connection': 'keep-alive', | |
| 'Upgrade-Insecure-Requests': '1' | |
| } | |
| try: | |
| response = requests.get(url, headers=headers, timeout=10) | |
| response.raise_for_status() | |
| except Exception as e: | |
| return jsonify({"error": f"Failed to fetch route list: {e}"}), 500 | |
| soup = BeautifulSoup(response.text, "html.parser") | |
| routes = {} | |
| for rid in range(7, 204): # all conventional routes | |
| tag = soup.find("p", {"id": str(rid)}) | |
| if tag and tag.text.strip(): | |
| # Clean text: | |
| text = tag.get_text(strip=True) | |
| # Remove leading symbols like "*" or "== $0" | |
| text = re.sub(r"^[^A-Za-z0-9]+", "", text) # strip non-alphanumeric at start | |
| text = re.sub(r"\s*==.*$", "", text) # strip trailing "== $0" or similar | |
| text = re.sub(r"\s+", " ", text).strip() # collapse spaces | |
| # Replace dash between number and name with space (e.g., "26-Dupont" -> "26 Dupont") | |
| text = re.sub(r'(\d+)-([A-Za-z])', r'\1 \2', text) | |
| routes[rid] = text | |
| for rid in range(300, 399): # all blue night routes | |
| tag = soup.find("p", {"id": str(rid)}) | |
| if tag and tag.text.strip(): | |
| # Clean text: | |
| text = tag.get_text(strip=True) | |
| # Remove leading symbols like "*" or "== $0" | |
| text = re.sub(r"^[^A-Za-z0-9]+", "", text) # strip non-alphanumeric at start | |
| text = re.sub(r"\s*==.*$", "", text) # strip trailing "== $0" or similar | |
| text = re.sub(r"\s+", " ", text).strip() # collapse spaces | |
| # Replace dash between number and name with space (e.g., "26-Dupont" -> "26 Dupont") | |
| text = re.sub(r'(\d+)-([A-Za-z])', r'\1 \2', text) | |
| routes[rid] = text | |
| for rid in range(900, 999): # all express routes | |
| tag = soup.find("p", {"id": str(rid)}) | |
| if tag and tag.text.strip(): | |
| # Clean text: | |
| text = tag.get_text(strip=True) | |
| # Remove leading symbols like "*" or "== $0" | |
| text = re.sub(r"^[^A-Za-z0-9]+", "", text) # strip non-alphanumeric at start | |
| text = re.sub(r"\s*==.*$", "", text) # strip trailing "== $0" or similar | |
| text = re.sub(r"\s+", " ", text).strip() # collapse spaces | |
| # Replace dash between number and name with space (e.g., "26-Dupont" -> "26 Dupont") | |
| text = re.sub(r'(\d+)-([A-Za-z])', r'\1 \2', text) | |
| routes[rid] = text | |
| return jsonify(routes) | |
| def listVehiclesByRoute(): | |
| data = request.get_json() | |
| if not data or "route" not in data: | |
| return jsonify({"error": "Missing route parameter"}), 400 | |
| route = data["route"] | |
| url = f"https://www.transsee.ca/routeveh?a=ttc&r={route}" | |
| headers = { | |
| 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36', | |
| 'Referer': 'https://www.transsee.ca/', | |
| 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', | |
| 'Accept-Language': 'en-US,en;q=0.9', | |
| 'DNT': '1', | |
| 'Connection': 'keep-alive', | |
| 'Upgrade-Insecure-Requests': '1' | |
| } | |
| try: | |
| response = requests.get(url, headers=headers, timeout=10) | |
| response.raise_for_status() | |
| except Exception as e: | |
| return jsonify({"error": f"Failed to fetch route list: {e}"}), 500 | |
| soup = BeautifulSoup(response.text, "html.parser") | |
| core_div = soup.find("div", class_="core") | |
| if not core_div: | |
| return jsonify({"error": "No vehicles found"}), 404 | |
| vehicles = [] | |
| all_popups = soup.find_all("div", class_="leaflet-popup-content") | |
| for p in core_div.find_all("p", id=True): | |
| vehicle_id = p.find("a").get_text(strip=True) if p.find("a") else None | |
| spans = p.find_all("span") | |
| delay = None | |
| if spans: | |
| first_text = spans[0].get_text(strip=True).lower() | |
| if "waiting" in first_text and len(spans) > 1: | |
| delay = spans[1].get_text(strip=True) | |
| else: | |
| delay = spans[0].get_text(strip=True) | |
| time = p.find("time").get_text(strip=True) if p.find("time") else None | |
| direction = None | |
| destination = None | |
| for part in p.stripped_strings: | |
| if part.startswith("going"): | |
| direction = part | |
| elif "to " in part: # destination line | |
| destination = part | |
| #print(f"{direction} {destination}") | |
| late = delay.split(' ')[-1].lower() == 'behind' | |
| delay_value = delay.split(' ')[0] | |
| # Convert delay from hh:mm:ss to mm:ss format | |
| if ":" in delay_value: | |
| parts = delay_value.split(":") | |
| if len(parts) == 3: # hh:mm:ss format | |
| try: | |
| h, m, s = parts | |
| # Convert to mm:ss by adding hours to minutes | |
| total_minutes = int(h) * 60 + int(m) | |
| delay_value = f"{total_minutes}:{s}" | |
| except: | |
| pass | |
| # If it's mm:ss format, keep as is | |
| delay = ("-" if late else "+") + delay_value | |
| # Extract coordinates from JavaScript AddMarker calls | |
| lat, lon = None, None | |
| # Find all script tags and look for AddMarker calls | |
| scripts = soup.find_all("script") | |
| for script in scripts: | |
| if script.string and "AddMarker" in script.string: | |
| # Split the script content by AddMarker calls and find the one with our vehicle ID | |
| add_marker_calls = script.string.split("AddMarker(") | |
| for call in add_marker_calls[1:]: # Skip the first split which is before AddMarker | |
| if f'"{vehicle_id}"' in call: | |
| # Extract coordinates from the first part [lat, lon] | |
| coords_match = re.search(r'\[([0-9\.\-]+),\s*([0-9\.\-]+)\]', call) | |
| if coords_match: | |
| lat, lon = float(coords_match.group(1)), float(coords_match.group(2)) | |
| print(f"Found coordinates for vehicle {vehicle_id}: lat={lat}, lon={lon}") | |
| break | |
| if lat is not None: | |
| break | |
| if direction is None or destination is None: continue | |
| sneakygolem = direction.split(' ') | |
| branch = sneakygolem[-1] if len(sneakygolem) > 1 else "" | |
| vehicles.append({ | |
| "vehicle_id": vehicle_id, | |
| "delay": delay, | |
| "time": time, | |
| "branch": branch, | |
| "destination": destination, | |
| "latitude": lat, | |
| "longitude": lon | |
| }) | |
| return jsonify({ | |
| "route": route, | |
| "vehicles": vehicles | |
| }) | |
| def serviceAlerts(): | |
| url = "https://www.transsee.ca/showmessages?a=ttc" | |
| headers = { | |
| 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36', | |
| 'Referer': 'https://www.transsee.ca/', | |
| 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', | |
| 'Accept-Language': 'en-US,en;q=0.9', | |
| 'DNT': '1', | |
| 'Connection': 'keep-alive', | |
| 'Upgrade-Insecure-Requests': '1' | |
| } | |
| try: | |
| response = requests.get(url, headers=headers, timeout=10) | |
| response.raise_for_status() | |
| except Exception as e: | |
| return jsonify({"error": f"Failed to fetch service alerts: {e}"}), 500 | |
| soup = BeautifulSoup(response.text, "html.parser") | |
| alerts = [] | |
| for section in soup.select("section.HighAlert"): | |
| title = section.find("b").get_text(strip=True) if section.find("b") else "" | |
| route = section.find("div").get_text(strip=True) if section.find("div") else "" | |
| alerts.append({ | |
| "title": title, | |
| "route": route | |
| }) | |
| return jsonify(alerts) | |
| def getVehicleInfo(vehicle_id): | |
| url = f'https://www.transsee.ca/fleetfind?a=ttc&findtrack=1&q={vehicle_id}&Go=Go' | |
| headers = { | |
| 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36', | |
| 'Referer': 'https://www.transsee.ca/', | |
| 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', | |
| 'Accept-Language': 'en-US,en;q=0.9', | |
| 'DNT': '1', # Do Not Track | |
| 'Connection': 'keep-alive', | |
| 'Upgrade-Insecure-Requests': '1' | |
| } | |
| try: | |
| response = requests.get(url, headers=headers, timeout=10) | |
| response.raise_for_status() | |
| except Exception as e: | |
| return jsonify({"error": f"Failed to fetch vehicle info: {e}"}), 500 | |
| soup = BeautifulSoup(response.text, 'html.parser') | |
| # Find the specific paragraph with id=vehicle_id | |
| p = soup.find("p", id=vehicle_id) | |
| if not p: | |
| return jsonify({"error": f"Vehicle {vehicle_id} not found"}), 404 | |
| # Get the HTML content to properly parse the structure | |
| html_content = str(p) | |
| # Extract direction (like "going D") | |
| direction = None | |
| direction_match = re.search(r'going\s+([A-Za-z0-9]+)', html_content) | |
| if direction_match: | |
| direction = direction_match.group(1) | |
| # Extract destination - only the text before <br> | |
| destination = None | |
| # Split by <br> and get only the first part | |
| before_br = html_content.split('<br/>')[0].split('<br>')[0] | |
| # Look for the quoted destination text that contains direction words | |
| dest_match = re.search(r'"([^"]*(?:South|North|East|West)[^"]*)"', before_br) | |
| if dest_match: | |
| raw_dest = dest_match.group(1).strip() | |
| # Clean up - remove anything after "at " or "on " which indicates location details | |
| clean_dest = re.sub(r'at\s+.*$', '', raw_dest) | |
| destination = clean_dest.strip() | |
| else: | |
| # Fallback: extract text content and look for direction patterns | |
| temp_soup = BeautifulSoup(before_br, 'html.parser') | |
| text_content = temp_soup.get_text() | |
| dest_match = re.search(r'((?:South|North|East|West) to [^=]*?)(?=\s*==|\s*$)', text_content) | |
| if dest_match: | |
| raw_dest = dest_match.group(1).strip() | |
| clean_dest = re.sub(r'at\s+.*$', '', raw_dest) | |
| destination = clean_dest.strip() | |
| return direction, destination | |
| def seek(): | |
| if not request.is_json: | |
| return jsonify({'error': 'Request must be JSON'}), 400 | |
| data = request.get_json() | |
| stop_id = data.get('stop') | |
| if not stop_id: | |
| return jsonify({'error': 'Stop ID is required'}), 404 | |
| url = url = f"https://www.transsee.ca/smsstop?a=ttc&id={stop_id}" | |
| headers = { | |
| 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36', | |
| 'Referer': 'https://www.transsee.ca/', | |
| 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', | |
| 'Accept-Language': 'en-US,en;q=0.9', | |
| 'DNT': '1', # Do Not Track | |
| 'Connection': 'keep-alive', | |
| 'Upgrade-Insecure-Requests': '1' | |
| } | |
| # Use Playwright to handle redirects and wait for page to load | |
| with sync_playwright() as p: | |
| # Find where Playwright actually installed Chrome | |
| import glob | |
| import os | |
| # Try different possible locations | |
| possible_locations = [ | |
| "/root/.cache/ms-playwright/chromium-*/chrome-linux/chrome", | |
| "/home/user/.cache/ms-playwright/chromium-*/chrome-linux/chrome", | |
| "/tmp/playwright-browsers/chromium-*/chrome-linux/chrome", | |
| "/usr/lib/chromium-browser/chromium-browser", | |
| "/usr/bin/chromium-browser" | |
| ] | |
| executable_path = None | |
| for pattern in possible_locations: | |
| if "*" in pattern: | |
| paths = glob.glob(pattern) | |
| if paths: | |
| executable_path = paths[0] | |
| print(f"Found Chrome at: {executable_path}") | |
| break | |
| else: | |
| if os.path.exists(pattern): | |
| executable_path = pattern | |
| print(f"Found Chrome at: {executable_path}") | |
| break | |
| if not executable_path: | |
| # Debug: show what's actually there | |
| print("Chrome not found. Checking directories:") | |
| for check_dir in ["/root/.cache", "/home", "/tmp", "/usr/bin"]: | |
| if os.path.exists(check_dir): | |
| print(f"Contents of {check_dir}:") | |
| os.system(f"find {check_dir} -name '*chrome*' -o -name '*chromium*' 2>/dev/null | head -10") | |
| # Try to let Playwright find it automatically | |
| try: | |
| browser = p.chromium.launch(headless=True) | |
| print("Playwright found Chrome automatically") | |
| except Exception as e: | |
| raise Exception(f"Chrome not found anywhere and Playwright can't find it: {e}") | |
| else: | |
| browser = p.chromium.launch(headless=True, executable_path=executable_path) | |
| print(f"Using Chrome executable: {executable_path}") | |
| page = browser.new_page() | |
| page.goto(url) | |
| # Wait for divp elements to load | |
| try: | |
| page.wait_for_selector("div.divp", timeout=15000) | |
| except: | |
| pass | |
| html = page.content() | |
| browser.close() | |
| soup = BeautifulSoup(html, 'html.parser') | |
| # Extract routes from the <p> tags that contain <b> tags | |
| routes = [] | |
| for p_tag in soup.find_all('p', id=re.compile(r'^\d+_\d+$')): | |
| b_tag = p_tag.find('b') | |
| if b_tag: | |
| route_link = b_tag.find('a', href=re.compile(r'stoplist\?a=ttc&r=\d+')) | |
| if route_link: | |
| route_text = route_link.get_text(strip=True) | |
| # Remove dash between number and name (e.g., "133-Neilson" -> "133 Neilson") | |
| route_text = re.sub(r'(\d+)-([A-Za-z])', r'\1 \2', route_text) | |
| # Get the full text content of the p tag for branch and destination | |
| full_text = p_tag.get_text() | |
| # Extract branch (letter/number after "going" if it exists) | |
| branch = None | |
| branch_match = re.search(r'going\s+([A-Za-z0-9]+)', full_text) | |
| if branch_match: | |
| branch_text = branch_match.group(1) | |
| # Only treat single characters/numbers as branches (A, B, 1, 2, etc.) | |
| if len(branch_text) == 1 and branch_text.isalnum(): | |
| branch = branch_text | |
| # Extract destination (everything after "to" until the end or next punctuation) | |
| destination = None | |
| destination_match = re.search(r'to\s+([^.]+)', full_text) | |
| if destination_match: | |
| destination = destination_match.group(1).strip() | |
| routes.append({ | |
| 'name': route_text, | |
| 'branch': branch, | |
| 'destination': destination | |
| }) | |
| pattern = re.compile(r'^\d{1,3}_\d{3,5}_[1-4]$') | |
| vehicles = [] | |
| for div_tag in soup.find_all('div', class_='divp', id=pattern): | |
| vehicle_id = div_tag.get('id') | |
| id_parts = vehicle_id.split('_') | |
| route = id_parts[0] # Extract first 1-3 digits as route | |
| vehicle_data = { | |
| 'route': route, | |
| } | |
| # Get both timedisp times | |
| timedisp_elements = div_tag.find_all('time', class_='timedisp') | |
| if len(timedisp_elements) >= 2: | |
| vehicle_data['actual'] = timedisp_elements[0].get_text(strip=True) | |
| vehicle_data['scheduled'] = timedisp_elements[1].get_text(strip=True) | |
| elif len(timedisp_elements) == 1: | |
| vehicle_data['actual'] = timedisp_elements[0].get_text(strip=True) | |
| vehicle_data['scheduled'] = None | |
| else: | |
| vehicle_data['actual'] = None | |
| vehicle_data['scheduled'] = None | |
| # If actual is null, replace with current time (bus is at stop) | |
| if vehicle_data['actual'] is None: | |
| # Get current time in EST/EDT timezone | |
| est_tz = pytz.timezone('America/New_York') | |
| current_time = datetime.now(est_tz) | |
| vehicle_data['actual'] = current_time.strftime("%I:%M:%S%p") | |
| # Get delay/ahead status (like "1:10 ahead") | |
| delay_span = None | |
| for span in div_tag.find_all('span', style=True): | |
| if 'color: light-dark' in span['style']: | |
| span_text = span.get_text(strip=True) | |
| if 'ahead' in span_text or 'behind' in span_text: | |
| delay_span = span | |
| break | |
| if delay_span: | |
| delay_text = delay_span.get_text(strip=True) | |
| if 'ahead' in delay_text: | |
| vehicle_data['delay'] = "+" + delay_text.replace(' ahead', '') | |
| elif 'behind' in delay_text: | |
| vehicle_data['delay'] = "-" + delay_text.replace(' behind', '') | |
| else: | |
| vehicle_data['delay'] = delay_text | |
| else: | |
| vehicle_data['delay'] = "0" | |
| # Get vehicle number from #MapMain link or plain text | |
| vehicle_link = div_tag.find('a', href="#MapMain") | |
| if vehicle_link: | |
| vehicle_data['vehicle_number'] = vehicle_link.get_text(strip=True) | |
| else: | |
| # Look for vehicle number in plain text like "Vehicle 1243 Load" | |
| text = div_tag.get_text() | |
| match = re.search(r'Vehicle\s+(\d+)', text) | |
| if match: | |
| vehicle_data['vehicle_number'] = match.group(1) | |
| else: | |
| vehicle_data['vehicle_number'] = None | |
| if vehicle_data['vehicle_number'] is None: continue | |
| vehicle_data['direction'], vehicle_data['destination'] = getVehicleInfo(vehicle_data['vehicle_number']) | |
| vehicles.append(vehicle_data) | |
| return jsonify({ | |
| 'stop': stop_id, | |
| 'routes': routes, | |
| 'vehicles': vehicles | |
| }) | |
| def health_check(): | |
| return 'Backend is running!', 200 | |
| if __name__ == "__main__": | |
| port = int(os.environ.get("PORT", 7860)) | |
| app.run(host="0.0.0.0", port=port, debug=False) |