from flask import Flask, render_template, request, jsonify from flask_cors import CORS import re import requests from bs4 import BeautifulSoup import tempfile import os from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC app = Flask(__name__) CORS(app) import re import requests from bs4 import BeautifulSoup from flask import jsonify @app.route('/routelist', methods=['GET']) def getRouteList(): url = "https://www.transsee.ca/routelist?a=ttc&ShowAll=1" headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36', 'Referer': 'https://www.transsee.ca/', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.9', 'DNT': '1', 'Connection': 'keep-alive', 'Upgrade-Insecure-Requests': '1' } try: response = requests.get(url, headers=headers, timeout=10) response.raise_for_status() except Exception as e: return jsonify({"error": f"Failed to fetch route list: {e}"}), 500 soup = BeautifulSoup(response.text, "html.parser") routes = {} for rid in range(7, 204): # all conventional routes tag = soup.find("p", {"id": str(rid)}) if tag and tag.text.strip(): # Clean text: text = tag.get_text(strip=True) # Remove leading symbols like "*" or "== $0" text = re.sub(r"^[^A-Za-z0-9]+", "", text) # strip non-alphanumeric at start text = re.sub(r"\s*==.*$", "", text) # strip trailing "== $0" or similar text = re.sub(r"\s+", " ", text).strip() # collapse spaces # Replace dash between number and name with space (e.g., "26-Dupont" -> "26 Dupont") text = re.sub(r'(\d+)-([A-Za-z])', r'\1 \2', text) routes[rid] = text for rid in range(300, 399): # all blue night routes tag = soup.find("p", {"id": str(rid)}) if tag and tag.text.strip(): # Clean text: text = tag.get_text(strip=True) # Remove leading symbols like "*" or "== $0" text = re.sub(r"^[^A-Za-z0-9]+", "", text) # strip non-alphanumeric at start text = re.sub(r"\s*==.*$", "", text) # strip trailing "== $0" or similar text = re.sub(r"\s+", " ", text).strip() # collapse spaces # Replace dash between number and name with space (e.g., "26-Dupont" -> "26 Dupont") text = re.sub(r'(\d+)-([A-Za-z])', r'\1 \2', text) routes[rid] = text for rid in range(900, 999): # all express routes tag = soup.find("p", {"id": str(rid)}) if tag and tag.text.strip(): # Clean text: text = tag.get_text(strip=True) # Remove leading symbols like "*" or "== $0" text = re.sub(r"^[^A-Za-z0-9]+", "", text) # strip non-alphanumeric at start text = re.sub(r"\s*==.*$", "", text) # strip trailing "== $0" or similar text = re.sub(r"\s+", " ", text).strip() # collapse spaces # Replace dash between number and name with space (e.g., "26-Dupont" -> "26 Dupont") text = re.sub(r'(\d+)-([A-Za-z])', r'\1 \2', text) routes[rid] = text return jsonify(routes) @app.route('/listvehiclesbyroute', methods=['POST']) def listVehiclesByRoute(): data = request.get_json() if not data or "route" not in data: return jsonify({"error": "Missing route parameter"}), 400 route = data["route"] url = f"https://www.transsee.ca/routeveh?a=ttc&r={route}" headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36', 'Referer': 'https://www.transsee.ca/', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.9', 'DNT': '1', 'Connection': 'keep-alive', 'Upgrade-Insecure-Requests': '1' } try: response = requests.get(url, headers=headers, timeout=10) response.raise_for_status() except Exception as e: return jsonify({"error": f"Failed to fetch route list: {e}"}), 500 soup = BeautifulSoup(response.text, "html.parser") core_div = soup.find("div", class_="core") if not core_div: return jsonify({"error": "No vehicles found"}), 404 vehicles = [] all_popups = soup.find_all("div", class_="leaflet-popup-content") for p in core_div.find_all("p", id=True): vehicle_id = p.find("a").get_text(strip=True) if p.find("a") else None spans = p.find_all("span") delay = None if spans: first_text = spans[0].get_text(strip=True).lower() if "waiting" in first_text and len(spans) > 1: delay = spans[1].get_text(strip=True) else: delay = spans[0].get_text(strip=True) time = p.find("time").get_text(strip=True) if p.find("time") else None direction = None destination = None for part in p.stripped_strings: if part.startswith("going"): direction = part elif "to " in part: # destination line destination = part #print(f"{direction} {destination}") late = delay.split(' ')[-1].lower() == 'behind' delay_value = delay.split(' ')[0] # Convert delay from hh:mm:ss to mm:ss format if ":" in delay_value: parts = delay_value.split(":") if len(parts) == 3: # hh:mm:ss format try: h, m, s = parts # Convert to mm:ss by adding hours to minutes total_minutes = int(h) * 60 + int(m) delay_value = f"{total_minutes}:{s}" except: pass # If it's mm:ss format, keep as is delay = ("-" if late else "+") + delay_value # Extract coordinates from JavaScript AddMarker calls lat, lon = None, None # Find all script tags and look for AddMarker calls scripts = soup.find_all("script") for script in scripts: if script.string and "AddMarker" in script.string: # Split the script content by AddMarker calls and find the one with our vehicle ID add_marker_calls = script.string.split("AddMarker(") for call in add_marker_calls[1:]: # Skip the first split which is before AddMarker if f'"{vehicle_id}"' in call: # Extract coordinates from the first part [lat, lon] coords_match = re.search(r'\[([0-9\.\-]+),\s*([0-9\.\-]+)\]', call) if coords_match: lat, lon = float(coords_match.group(1)), float(coords_match.group(2)) print(f"Found coordinates for vehicle {vehicle_id}: lat={lat}, lon={lon}") break if lat is not None: break if direction is None or destination is None: continue sneakygolem = direction.split(' ') branch = sneakygolem[-1] if len(sneakygolem) > 1 else "" vehicles.append({ "vehicle_id": vehicle_id, "delay": delay, "time": time, "branch": branch, "destination": destination, "latitude": lat, "longitude": lon }) return jsonify({ "route": route, "vehicles": vehicles }) @app.route("/alerts", methods=["GET"]) def serviceAlerts(): url = "https://www.transsee.ca/showmessages?a=ttc" # Headless Chrome options = Options() options.add_argument("--headless=new") options.add_argument("--disable-gpu") options.add_argument("--no-sandbox") driver = webdriver.Chrome(options=options) driver.get(url) # Wait until at least one HighAlert is loaded WebDriverWait(driver, 15).until( EC.presence_of_element_located((By.CSS_SELECTOR, "section.HighAlert")) ) html = driver.page_source driver.quit() soup = BeautifulSoup(html, "html.parser") alerts = [] for section in soup.select("section.HighAlert"): title = section.find("b").get_text(strip=True) if section.find("b") else "" route = section.find("div").get_text(strip=True) if section.find("div") else "" alerts.append({ "title": title, "route": route }) return jsonify(alerts) @app.route('/', methods=['GET']) def health_check(): return 'Backend is running!', 200 if __name__ == '__main__': app.run(debug=True, port=5000)