from flask import Flask, render_template, request, jsonify from flask_cors import CORS import re import requests from bs4 import BeautifulSoup import tempfile import os from playwright.sync_api import sync_playwright from datetime import datetime import pytz os.environ["SE_CACHE_PATH"] = "/tmp/selenium" app = Flask(__name__) CORS(app) import re import requests from bs4 import BeautifulSoup from flask import jsonify @app.route('/routelist', methods=['GET']) def getRouteList(): url = "https://www.transsee.ca/routelist?a=ttc&ShowAll=1" headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36', 'Referer': 'https://www.transsee.ca/', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.9', 'DNT': '1', 'Connection': 'keep-alive', 'Upgrade-Insecure-Requests': '1' } try: response = requests.get(url, headers=headers, timeout=10) response.raise_for_status() except Exception as e: return jsonify({"error": f"Failed to fetch route list: {e}"}), 500 soup = BeautifulSoup(response.text, "html.parser") routes = {} for rid in range(7, 204): # all conventional routes tag = soup.find("p", {"id": str(rid)}) if tag and tag.text.strip(): # Clean text: text = tag.get_text(strip=True) # Remove leading symbols like "*" or "== $0" text = re.sub(r"^[^A-Za-z0-9]+", "", text) # strip non-alphanumeric at start text = re.sub(r"\s*==.*$", "", text) # strip trailing "== $0" or similar text = re.sub(r"\s+", " ", text).strip() # collapse spaces # Replace dash between number and name with space (e.g., "26-Dupont" -> "26 Dupont") text = re.sub(r'(\d+)-([A-Za-z])', r'\1 \2', text) routes[rid] = text for rid in range(300, 399): # all blue night routes tag = soup.find("p", {"id": str(rid)}) if tag and tag.text.strip(): # Clean text: text = tag.get_text(strip=True) # Remove leading symbols like "*" or "== $0" text = re.sub(r"^[^A-Za-z0-9]+", "", text) # strip non-alphanumeric at start text = re.sub(r"\s*==.*$", "", text) # strip trailing "== $0" or similar text = re.sub(r"\s+", " ", text).strip() # collapse spaces # Replace dash between number and name with space (e.g., "26-Dupont" -> "26 Dupont") text = re.sub(r'(\d+)-([A-Za-z])', r'\1 \2', text) routes[rid] = text for rid in range(500, 599): # all streetcar routes tag = soup.find("p", {"id": str(rid)}) if tag and tag.text.strip(): # Clean text: text = tag.get_text(strip=True) # Remove leading symbols like "*" or "== $0" text = re.sub(r"^[^A-Za-z0-9]+", "", text) # strip non-alphanumeric at start text = re.sub(r"\s*==.*$", "", text) # strip trailing "== $0" or similar text = re.sub(r"\s+", " ", text).strip() # collapse spaces # Replace dash between number and name with space (e.g., "26-Dupont" -> "26 Dupont") text = re.sub(r'(\d+)-([A-Za-z])', r'\1 \2', text) routes[rid] = text for rid in range(900, 999): # all express routes tag = soup.find("p", {"id": str(rid)}) if tag and tag.text.strip(): # Clean text: text = tag.get_text(strip=True) # Remove leading symbols like "*" or "== $0" text = re.sub(r"^[^A-Za-z0-9]+", "", text) # strip non-alphanumeric at start text = re.sub(r"\s*==.*$", "", text) # strip trailing "== $0" or similar text = re.sub(r"\s+", " ", text).strip() # collapse spaces # Replace dash between number and name with space (e.g., "26-Dupont" -> "26 Dupont") text = re.sub(r'(\d+)-([A-Za-z])', r'\1 \2', text) routes[rid] = text return jsonify(routes) @app.route('/listvehiclesbyroute', methods=['POST']) def listVehiclesByRoute(): data = request.get_json() if not data or "route" not in data: return jsonify({"error": "Missing route parameter"}), 400 route = data["route"] url = f"https://www.transsee.ca/routeveh?a=ttc&r={route}" headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36', 'Referer': 'https://www.transsee.ca/', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.9', 'DNT': '1', 'Connection': 'keep-alive', 'Upgrade-Insecure-Requests': '1' } try: response = requests.get(url, headers=headers, timeout=10) response.raise_for_status() except Exception as e: return jsonify({"error": f"Failed to fetch route list: {e}"}), 500 soup = BeautifulSoup(response.text, "html.parser") core_div = soup.find("div", class_="core") if not core_div: return jsonify({"error": "No vehicles found"}), 404 vehicles = [] all_popups = soup.find_all("div", class_="leaflet-popup-content") for p in core_div.find_all("p", id=True): vehicle_id = p.find("a").get_text(strip=True) if p.find("a") else None spans = p.find_all("span") delay = None if spans: first_text = spans[0].get_text(strip=True).lower() if "waiting" in first_text and len(spans) > 1: delay = spans[1].get_text(strip=True) else: delay = spans[0].get_text(strip=True) time = p.find("time").get_text(strip=True) if p.find("time") else None direction = None destination = None for part in p.stripped_strings: if part.startswith("going"): direction = part elif "to " in part: # destination line destination = part #print(f"{direction} {destination}") if delay is None: continue late = delay.split(' ')[-1].lower() == 'behind' delay_value = delay.split(' ')[0] # Convert delay from hh:mm:ss to mm:ss format if ":" in delay_value: parts = delay_value.split(":") if len(parts) == 3: # hh:mm:ss format try: h, m, s = parts # Convert to mm:ss by adding hours to minutes total_minutes = int(h) * 60 + int(m) delay_value = f"{total_minutes}:{s}" except: pass # If it's mm:ss format, keep as is delay = ("-" if late else "+") + delay_value # Extract coordinates from JavaScript AddMarker calls lat, lon = None, None # Find all script tags and look for AddMarker calls scripts = soup.find_all("script") for script in scripts: if script.string and "AddMarker" in script.string: # Split the script content by AddMarker calls and find the one with our vehicle ID add_marker_calls = script.string.split("AddMarker(") for call in add_marker_calls[1:]: # Skip the first split which is before AddMarker if f'"{vehicle_id}"' in call: # Extract coordinates from the first part [lat, lon] coords_match = re.search(r'\[([0-9\.\-]+),\s*([0-9\.\-]+)\]', call) if coords_match: lat, lon = float(coords_match.group(1)), float(coords_match.group(2)) print(f"Found coordinates for vehicle {vehicle_id}: lat={lat}, lon={lon}") break if lat is not None: break #if direction is None or destination is None: continue if direction is not None: sneakygolem = direction.split(' ') branch = sneakygolem[-1] if len(sneakygolem) > 1 else "" else: branch = None vehicles.append({ "vehicle_id": vehicle_id, "delay": delay, "time": time, "branch": branch, "destination": destination, "latitude": lat, "longitude": lon }) return jsonify({ "route": route, "vehicles": vehicles }) @app.route("/alerts", methods=["GET"]) def serviceAlerts(): url = "https://www.transsee.ca/showmessages?a=ttc" headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36', 'Referer': 'https://www.transsee.ca/', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.9', 'DNT': '1', 'Connection': 'keep-alive', 'Upgrade-Insecure-Requests': '1' } try: response = requests.get(url, headers=headers, timeout=10) response.raise_for_status() except Exception as e: return jsonify({"error": f"Failed to fetch service alerts: {e}"}), 500 soup = BeautifulSoup(response.text, "html.parser") alerts = [] for section in soup.select("section.HighAlert"): title = section.find("b").get_text(strip=True) if section.find("b") else "" route = section.find("div").get_text(strip=True) if section.find("div") else "" alerts.append({ "title": title, "route": route }) return jsonify(alerts) def getVehicleInfo(vehicle_id): url = f'https://www.transsee.ca/fleetfind?a=ttc&findtrack=1&q={vehicle_id}&Go=Go' headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36', 'Referer': 'https://www.transsee.ca/', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.9', 'DNT': '1', # Do Not Track 'Connection': 'keep-alive', 'Upgrade-Insecure-Requests': '1' } try: response = requests.get(url, headers=headers, timeout=10) response.raise_for_status() except Exception as e: return jsonify({"error": f"Failed to fetch vehicle info: {e}"}), 500 soup = BeautifulSoup(response.text, 'html.parser') # Find the specific paragraph with id=vehicle_id p = soup.find("p", id=vehicle_id) if not p: return jsonify({"error": f"Vehicle {vehicle_id} not found"}), 404 # Get the HTML content to properly parse the structure html_content = str(p) # Extract direction (like "going D") direction = None direction_match = re.search(r'going\s+([A-Za-z0-9]+)', html_content) if direction_match: direction = direction_match.group(1) # Extract destination - only the text before
destination = None # Split by
and get only the first part before_br = html_content.split('
')[0].split('
')[0] # Look for the quoted destination text that contains direction words dest_match = re.search(r'"([^"]*(?:South|North|East|West)[^"]*)"', before_br) if dest_match: raw_dest = dest_match.group(1).strip() # Clean up - remove anything after "at " or "on " which indicates location details clean_dest = re.sub(r'at\s+.*$', '', raw_dest) destination = clean_dest.strip() else: # Fallback: extract text content and look for direction patterns temp_soup = BeautifulSoup(before_br, 'html.parser') text_content = temp_soup.get_text() dest_match = re.search(r'((?:South|North|East|West) to [^=]*?)(?=\s*==|\s*$)', text_content) if dest_match: raw_dest = dest_match.group(1).strip() clean_dest = re.sub(r'at\s+.*$', '', raw_dest) destination = clean_dest.strip() return direction, destination @app.route('/seek', methods=['POST']) def seek(): if not request.is_json: return jsonify({'error': 'Request must be JSON'}), 400 data = request.get_json() stop_id = data.get('stop') if not stop_id: return jsonify({'error': 'Stop ID is required'}), 404 url = url = f"https://www.transsee.ca/smsstop?a=ttc&id={stop_id}" headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36', 'Referer': 'https://www.transsee.ca/', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.9', 'DNT': '1', # Do Not Track 'Connection': 'keep-alive', 'Upgrade-Insecure-Requests': '1' } # Use Playwright to handle redirects and wait for page to load with sync_playwright() as p: # Find where Playwright actually installed Chrome import glob import os # Try different possible locations possible_locations = [ "/root/.cache/ms-playwright/chromium-*/chrome-linux/chrome", "/home/user/.cache/ms-playwright/chromium-*/chrome-linux/chrome", "/tmp/playwright-browsers/chromium-*/chrome-linux/chrome", "/usr/lib/chromium-browser/chromium-browser", "/usr/bin/chromium-browser" ] executable_path = None for pattern in possible_locations: if "*" in pattern: paths = glob.glob(pattern) if paths: executable_path = paths[0] print(f"Found Chrome at: {executable_path}") break else: if os.path.exists(pattern): executable_path = pattern print(f"Found Chrome at: {executable_path}") break if not executable_path: # Debug: show what's actually there print("Chrome not found. Checking directories:") for check_dir in ["/root/.cache", "/home", "/tmp", "/usr/bin"]: if os.path.exists(check_dir): print(f"Contents of {check_dir}:") os.system(f"find {check_dir} -name '*chrome*' -o -name '*chromium*' 2>/dev/null | head -10") # Try to let Playwright find it automatically try: browser = p.chromium.launch(headless=True) print("Playwright found Chrome automatically") except Exception as e: raise Exception(f"Chrome not found anywhere and Playwright can't find it: {e}") else: browser = p.chromium.launch(headless=True, executable_path=executable_path) print(f"Using Chrome executable: {executable_path}") page = browser.new_page() page.goto(url) # Wait for divp elements to load try: page.wait_for_selector("div.divp", timeout=15000) except: pass html = page.content() browser.close() soup = BeautifulSoup(html, 'html.parser') # Extract routes from the

tags that contain tags routes = [] for p_tag in soup.find_all('p', id=re.compile(r'^\d+_\d+$')): b_tag = p_tag.find('b') if b_tag: route_link = b_tag.find('a', href=re.compile(r'stoplist\?a=ttc&r=\d+')) if route_link: route_text = route_link.get_text(strip=True) # Remove dash between number and name (e.g., "133-Neilson" -> "133 Neilson") route_text = re.sub(r'(\d+)-([A-Za-z])', r'\1 \2', route_text) # Get the full text content of the p tag for branch and destination full_text = p_tag.get_text() # Extract branch (letter/number after "going" if it exists) branch = None branch_match = re.search(r'going\s+([A-Za-z0-9]+)', full_text) if branch_match: branch_text = branch_match.group(1) # Only treat single characters/numbers as branches (A, B, 1, 2, etc.) if len(branch_text) == 1 and branch_text.isalnum(): branch = branch_text # Extract destination (everything after "to" until the end or next punctuation) destination = None destination_match = re.search(r'to\s+([^.]+)', full_text) if destination_match: destination = destination_match.group(1).strip() routes.append({ 'name': route_text, 'branch': branch, 'destination': destination }) pattern = re.compile(r'^\d{1,3}_\d{3,5}_[1-4]$') vehicles = [] for div_tag in soup.find_all('div', class_='divp', id=pattern): vehicle_id = div_tag.get('id') id_parts = vehicle_id.split('_') route = id_parts[0] # Extract first 1-3 digits as route vehicle_data = { 'route': route, } # Get both timedisp times timedisp_elements = div_tag.find_all('time', class_='timedisp') if len(timedisp_elements) >= 2: vehicle_data['actual'] = timedisp_elements[0].get_text(strip=True) vehicle_data['scheduled'] = timedisp_elements[1].get_text(strip=True) elif len(timedisp_elements) == 1: vehicle_data['actual'] = timedisp_elements[0].get_text(strip=True) vehicle_data['scheduled'] = None else: vehicle_data['actual'] = None vehicle_data['scheduled'] = None # If actual is null, replace with current time (bus is at stop) if vehicle_data['actual'] is None: # Get current time in EST/EDT timezone est_tz = pytz.timezone('America/New_York') current_time = datetime.now(est_tz) vehicle_data['actual'] = current_time.strftime("%I:%M:%S%p") # Get delay/ahead status (like "1:10 ahead") delay_span = None for span in div_tag.find_all('span', style=True): if 'color: light-dark' in span['style']: span_text = span.get_text(strip=True) if 'ahead' in span_text or 'behind' in span_text: delay_span = span break if delay_span: delay_text = delay_span.get_text(strip=True) if 'ahead' in delay_text: vehicle_data['delay'] = "+" + delay_text.replace(' ahead', '') elif 'behind' in delay_text: vehicle_data['delay'] = "-" + delay_text.replace(' behind', '') else: vehicle_data['delay'] = delay_text else: vehicle_data['delay'] = "0" # Get vehicle number from #MapMain link or plain text vehicle_link = div_tag.find('a', href="#MapMain") if vehicle_link: vehicle_data['vehicle_number'] = vehicle_link.get_text(strip=True) else: # Look for vehicle number in plain text like "Vehicle 1243 Load" text = div_tag.get_text() match = re.search(r'Vehicle\s+(\d+)', text) if match: vehicle_data['vehicle_number'] = match.group(1) else: vehicle_data['vehicle_number'] = None if vehicle_data['vehicle_number'] is None: continue vehicle_data['direction'], vehicle_data['destination'] = getVehicleInfo(vehicle_data['vehicle_number']) vehicles.append(vehicle_data) return jsonify({ 'stop': stop_id, 'routes': routes, 'vehicles': vehicles }) @app.route('/', methods=['GET']) def health_check(): return 'Backend is running!', 200 if __name__ == "__main__": port = int(os.environ.get("PORT", 7860)) app.run(host="0.0.0.0", port=port, debug=False)