Spaces:

42Cummer
/

TransseeAPI

Sleeping

App Files Files Community

TransseeAPI / app.py

42Cummer

GER

4e02624 verified 4 months ago

raw

history blame

15.7 kB

	from flask import Flask, render_template, request, jsonify
	from flask_cors import CORS
	import re
	import requests
	from bs4 import BeautifulSoup
	import tempfile
	import os
	from selenium import webdriver
	from selenium.webdriver.chrome.options import Options
	from selenium.webdriver.chrome.service import Service
	from selenium.webdriver.common.by import By
	from selenium.webdriver.support.ui import WebDriverWait
	from selenium.webdriver.support import expected_conditions as EC
	from datetime import datetime
	import pytz
	from webdriver_manager.chrome import ChromeDriverManager

	os.environ["SE_CACHE_PATH"] = "/tmp/selenium"

	app = Flask(__name__)
	CORS(app)

	import re
	import requests
	from bs4 import BeautifulSoup
	from flask import jsonify

	@app.route('/routelist', methods=['GET'])
	def getRouteList():
	url = "https://www.transsee.ca/routelist?a=ttc&ShowAll=1"
	headers = {
	'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
	'Referer': 'https://www.transsee.ca/',
	'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,/;q=0.8',
	'Accept-Language': 'en-US,en;q=0.9',
	'DNT': '1',
	'Connection': 'keep-alive',
	'Upgrade-Insecure-Requests': '1'
	}
	try:
	response = requests.get(url, headers=headers, timeout=10)
	response.raise_for_status()
	except Exception as e:
	return jsonify({"error": f"Failed to fetch route list: {e}"}), 500

	soup = BeautifulSoup(response.text, "html.parser")
	routes = {}

	for rid in range(7, 204): # all conventional routes
	tag = soup.find("p", {"id": str(rid)})
	if tag and tag.text.strip():
	# Clean text:
	text = tag.get_text(strip=True)

	# Remove leading symbols like "*" or "== $0"
	text = re.sub(r"^[^A-Za-z0-9]+", "", text) # strip non-alphanumeric at start
	text = re.sub(r"\s==.$", "", text) # strip trailing "== $0" or similar
	text = re.sub(r"\s+", " ", text).strip() # collapse spaces

	# Replace dash between number and name with space (e.g., "26-Dupont" -> "26 Dupont")
	text = re.sub(r'(\d+)-([A-Za-z])', r'\1 \2', text)

	routes[rid] = text

	for rid in range(300, 399): # all blue night routes
	tag = soup.find("p", {"id": str(rid)})
	if tag and tag.text.strip():
	# Clean text:
	text = tag.get_text(strip=True)

	# Remove leading symbols like "*" or "== $0"
	text = re.sub(r"^[^A-Za-z0-9]+", "", text) # strip non-alphanumeric at start
	text = re.sub(r"\s==.$", "", text) # strip trailing "== $0" or similar
	text = re.sub(r"\s+", " ", text).strip() # collapse spaces

	# Replace dash between number and name with space (e.g., "26-Dupont" -> "26 Dupont")
	text = re.sub(r'(\d+)-([A-Za-z])', r'\1 \2', text)

	routes[rid] = text

	for rid in range(900, 999): # all express routes
	tag = soup.find("p", {"id": str(rid)})
	if tag and tag.text.strip():
	# Clean text:
	text = tag.get_text(strip=True)

	# Remove leading symbols like "*" or "== $0"
	text = re.sub(r"^[^A-Za-z0-9]+", "", text) # strip non-alphanumeric at start
	text = re.sub(r"\s==.$", "", text) # strip trailing "== $0" or similar
	text = re.sub(r"\s+", " ", text).strip() # collapse spaces

	# Replace dash between number and name with space (e.g., "26-Dupont" -> "26 Dupont")
	text = re.sub(r'(\d+)-([A-Za-z])', r'\1 \2', text)

	routes[rid] = text
	return jsonify(routes)

	@app.route('/listvehiclesbyroute', methods=['POST'])
	def listVehiclesByRoute():
	data = request.get_json()
	if not data or "route" not in data:
	return jsonify({"error": "Missing route parameter"}), 400
	route = data["route"]
	url = f"https://www.transsee.ca/routeveh?a=ttc&r={route}"
	headers = {
	'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
	'Referer': 'https://www.transsee.ca/',
	'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,/;q=0.8',
	'Accept-Language': 'en-US,en;q=0.9',
	'DNT': '1',
	'Connection': 'keep-alive',
	'Upgrade-Insecure-Requests': '1'
	}
	try:
	response = requests.get(url, headers=headers, timeout=10)
	response.raise_for_status()
	except Exception as e:
	return jsonify({"error": f"Failed to fetch route list: {e}"}), 500

	soup = BeautifulSoup(response.text, "html.parser")
	core_div = soup.find("div", class_="core")
	if not core_div:
	return jsonify({"error": "No vehicles found"}), 404

	vehicles = []
	all_popups = soup.find_all("div", class_="leaflet-popup-content")


	for p in core_div.find_all("p", id=True):
	vehicle_id = p.find("a").get_text(strip=True) if p.find("a") else None
	spans = p.find_all("span")
	delay = None
	if spans:
	first_text = spans[0].get_text(strip=True).lower()
	if "waiting" in first_text and len(spans) > 1:
	delay = spans[1].get_text(strip=True)
	else:
	delay = spans[0].get_text(strip=True)

	time = p.find("time").get_text(strip=True) if p.find("time") else None
	direction = None
	destination = None

	for part in p.stripped_strings:
	if part.startswith("going"):
	direction = part
	elif "to " in part: # destination line
	destination = part
	#print(f"{direction} {destination}")

	late = delay.split(' ')[-1].lower() == 'behind'
	delay_value = delay.split(' ')[0]

	# Convert delay from hh:mm:ss to mm:ss format
	if ":" in delay_value:
	parts = delay_value.split(":")
	if len(parts) == 3: # hh:mm:ss format
	try:
	h, m, s = parts
	# Convert to mm:ss by adding hours to minutes
	total_minutes = int(h) * 60 + int(m)
	delay_value = f"{total_minutes}:{s}"
	except:
	pass
	# If it's mm:ss format, keep as is

	delay = ("-" if late else "+") + delay_value

	# Extract coordinates from JavaScript AddMarker calls
	lat, lon = None, None

	# Find all script tags and look for AddMarker calls
	scripts = soup.find_all("script")
	for script in scripts:
	if script.string and "AddMarker" in script.string:
	# Split the script content by AddMarker calls and find the one with our vehicle ID
	add_marker_calls = script.string.split("AddMarker(")
	for call in add_marker_calls[1:]: # Skip the first split which is before AddMarker
	if f'"{vehicle_id}"' in call:
	# Extract coordinates from the first part [lat, lon]
	coords_match = re.search(r'\[([0-9\.\-]+),\s*([0-9\.\-]+)\]', call)
	if coords_match:
	lat, lon = float(coords_match.group(1)), float(coords_match.group(2))
	print(f"Found coordinates for vehicle {vehicle_id}: lat={lat}, lon={lon}")
	break
	if lat is not None:
	break

	if direction is None or destination is None: continue
	sneakygolem = direction.split(' ')
	branch = sneakygolem[-1] if len(sneakygolem) > 1 else ""
	vehicles.append({
	"vehicle_id": vehicle_id,
	"delay": delay,
	"time": time,
	"branch": branch,
	"destination": destination,
	"latitude": lat,
	"longitude": lon
	})

	return jsonify({
	"route": route,
	"vehicles": vehicles
	})

	@app.route("/alerts", methods=["GET"])
	def serviceAlerts():
	url = "https://www.transsee.ca/showmessages?a=ttc"
	headers = {
	'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
	'Referer': 'https://www.transsee.ca/',
	'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,/;q=0.8',
	'Accept-Language': 'en-US,en;q=0.9',
	'DNT': '1',
	'Connection': 'keep-alive',
	'Upgrade-Insecure-Requests': '1'
	}

	try:
	response = requests.get(url, headers=headers, timeout=10)
	response.raise_for_status()
	except Exception as e:
	return jsonify({"error": f"Failed to fetch service alerts: {e}"}), 500

	soup = BeautifulSoup(response.text, "html.parser")

	alerts = []
	for section in soup.select("section.HighAlert"):
	title = section.find("b").get_text(strip=True) if section.find("b") else ""
	route = section.find("div").get_text(strip=True) if section.find("div") else ""

	alerts.append({
	"title": title,
	"route": route
	})

	return jsonify(alerts)

	@app.route('/seek', methods=['POST'])
	def seek():
	if not request.is_json:
	return jsonify({'error': 'Request must be JSON'}), 400

	data = request.get_json()
	stop_id = data.get('stop')

	if not stop_id:
	return jsonify({'error': 'Stop ID is required'}), 404

	url = url = f"https://www.transsee.ca/smsstop?a=ttc&id={stop_id}"
	headers = {
	'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
	'Referer': 'https://www.transsee.ca/',
	'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,/;q=0.8',
	'Accept-Language': 'en-US,en;q=0.9',
	'DNT': '1', # Do Not Track
	'Connection': 'keep-alive',
	'Upgrade-Insecure-Requests': '1'
	}
	# Use Selenium to handle redirects and wait for page to load
	options = Options()
	options.add_argument("--headless=new")
	options.add_argument("--disable-gpu")
	options.add_argument("--no-sandbox")
	options.add_argument("--disable-dev-shm-usage")
	options.add_argument("--remote-debugging-port=9222")

	# Use ChromeDriver from /tmp location
	service = Service("/tmp/chromedriver")
	driver = webdriver.Chrome(service=service, options=options)

	driver.get(url)

	# Wait for the page to load and look for divp elements
	try:
	WebDriverWait(driver, 15).until(
	EC.presence_of_element_located((By.CLASS_NAME, "divp"))
	)
	except:
	pass

	html = driver.page_source
	driver.quit()

	soup = BeautifulSoup(html, 'html.parser')

	# Extract routes from the <p> tags that contain <b> tags
	routes = []
	for p_tag in soup.find_all('p', id=re.compile(r'^\d+_\d+$')):
	b_tag = p_tag.find('b')
	if b_tag:
	route_link = b_tag.find('a', href=re.compile(r'stoplist\?a=ttc&r=\d+'))
	if route_link:
	route_text = route_link.get_text(strip=True)
	# Remove dash between number and name (e.g., "133-Neilson" -> "133 Neilson")
	route_text = re.sub(r'(\d+)-([A-Za-z])', r'\1 \2', route_text)

	# Get the full text content of the p tag for branch and destination
	full_text = p_tag.get_text()

	# Extract branch (letter/number after "going" if it exists)
	branch = None
	branch_match = re.search(r'going\s+([A-Za-z0-9]+)', full_text)
	if branch_match:
	branch_text = branch_match.group(1)
	# Only treat single characters/numbers as branches (A, B, 1, 2, etc.)
	if len(branch_text) == 1 and branch_text.isalnum():
	branch = branch_text

	# Extract destination (everything after "to" until the end or next punctuation)
	destination = None
	destination_match = re.search(r'to\s+([^.]+)', full_text)
	if destination_match:
	destination = destination_match.group(1).strip()

	routes.append({
	'name': route_text,
	'branch': branch,
	'destination': destination
	})

	pattern = re.compile(r'^\d{1,3}_\d{3,5}_[1-4]$')

	vehicles = []

	for div_tag in soup.find_all('div', class_='divp', id=pattern):
	vehicle_id = div_tag.get('id')
	id_parts = vehicle_id.split('_')
	route = id_parts[0] # Extract first 1-3 digits as route

	vehicle_data = {
	'route': route,
	}

	# Get both timedisp times
	timedisp_elements = div_tag.find_all('time', class_='timedisp')
	if len(timedisp_elements) >= 2:
	vehicle_data['actual'] = timedisp_elements[0].get_text(strip=True)
	vehicle_data['scheduled'] = timedisp_elements[1].get_text(strip=True)
	elif len(timedisp_elements) == 1:
	vehicle_data['actual'] = timedisp_elements[0].get_text(strip=True)
	vehicle_data['scheduled'] = None
	else:
	vehicle_data['actual'] = None
	vehicle_data['scheduled'] = None

	# If actual is null, replace with current time (bus is at stop)
	if vehicle_data['actual'] is None:
	# Get current time in EST/EDT timezone
	est_tz = pytz.timezone('America/New_York')
	current_time = datetime.now(est_tz)
	vehicle_data['actual'] = current_time.strftime("%I:%M:%S%p")

	# Get delay/ahead status (like "1:10 ahead")
	delay_span = None
	for span in div_tag.find_all('span', style=True):
	if 'color: light-dark' in span['style']:
	span_text = span.get_text(strip=True)
	if 'ahead' in span_text or 'behind' in span_text:
	delay_span = span
	break

	if delay_span:
	delay_text = delay_span.get_text(strip=True)
	if 'ahead' in delay_text:
	vehicle_data['delay'] = "+" + delay_text.replace(' ahead', '')
	elif 'behind' in delay_text:
	vehicle_data['delay'] = "-" + delay_text.replace(' behind', '')
	else:
	vehicle_data['delay'] = delay_text
	else:
	vehicle_data['delay'] = "0"

	# Get vehicle number from #MapMain link or plain text
	vehicle_link = div_tag.find('a', href="#MapMain")
	if vehicle_link:
	vehicle_data['vehicle_number'] = vehicle_link.get_text(strip=True)
	else:
	# Look for vehicle number in plain text like "Vehicle 1243 Load"
	text = div_tag.get_text()
	match = re.search(r'Vehicle\s+(\d+)', text)
	if match:
	vehicle_data['vehicle_number'] = match.group(1)
	else:
	vehicle_data['vehicle_number'] = None

	vehicles.append(vehicle_data)

	return jsonify({
	'stop': stop_id,
	'routes': routes,
	'vehicles': vehicles
	})

	@app.route('/', methods=['GET'])
	def health_check():
	return 'Backend is running!', 200

	if __name__ == '__main__':
	#app.run(debug=True, port=4999)
	app.run(debug=True, port=5000)