Spaces:

42Cummer
/

TransseeAPI

Sleeping

App Files Files Community

TransseeAPI / app.py

42Cummer

remove debug and typo

1d7a49e verified 4 months ago

raw

history blame

19.7 kB

	from flask import Flask, render_template, request, jsonify
	from flask_cors import CORS
	import re
	import requests
	from bs4 import BeautifulSoup
	import tempfile
	import os
	from playwright.sync_api import sync_playwright
	from datetime import datetime
	import pytz

	os.environ["SE_CACHE_PATH"] = "/tmp/selenium"

	app = Flask(__name__)
	CORS(app)

	import re
	import requests
	from bs4 import BeautifulSoup
	from flask import jsonify

	@app.route('/routelist', methods=['GET'])
	def getRouteList():
	url = "https://www.transsee.ca/routelist?a=ttc&ShowAll=1"
	headers = {
	'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
	'Referer': 'https://www.transsee.ca/',
	'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,/;q=0.8',
	'Accept-Language': 'en-US,en;q=0.9',
	'DNT': '1',
	'Connection': 'keep-alive',
	'Upgrade-Insecure-Requests': '1'
	}
	try:
	response = requests.get(url, headers=headers, timeout=10)
	response.raise_for_status()
	except Exception as e:
	return jsonify({"error": f"Failed to fetch route list: {e}"}), 500

	soup = BeautifulSoup(response.text, "html.parser")
	routes = {}

	for rid in range(7, 204): # all conventional routes
	tag = soup.find("p", {"id": str(rid)})
	if tag and tag.text.strip():
	# Clean text:
	text = tag.get_text(strip=True)

	# Remove leading symbols like "*" or "== $0"
	text = re.sub(r"^[^A-Za-z0-9]+", "", text) # strip non-alphanumeric at start
	text = re.sub(r"\s==.$", "", text) # strip trailing "== $0" or similar
	text = re.sub(r"\s+", " ", text).strip() # collapse spaces

	# Replace dash between number and name with space (e.g., "26-Dupont" -> "26 Dupont")
	text = re.sub(r'(\d+)-([A-Za-z])', r'\1 \2', text)

	routes[rid] = text

	for rid in range(300, 399): # all blue night routes
	tag = soup.find("p", {"id": str(rid)})
	if tag and tag.text.strip():
	# Clean text:
	text = tag.get_text(strip=True)

	# Remove leading symbols like "*" or "== $0"
	text = re.sub(r"^[^A-Za-z0-9]+", "", text) # strip non-alphanumeric at start
	text = re.sub(r"\s==.$", "", text) # strip trailing "== $0" or similar
	text = re.sub(r"\s+", " ", text).strip() # collapse spaces

	# Replace dash between number and name with space (e.g., "26-Dupont" -> "26 Dupont")
	text = re.sub(r'(\d+)-([A-Za-z])', r'\1 \2', text)

	routes[rid] = text

	for rid in range(900, 999): # all express routes
	tag = soup.find("p", {"id": str(rid)})
	if tag and tag.text.strip():
	# Clean text:
	text = tag.get_text(strip=True)

	# Remove leading symbols like "*" or "== $0"
	text = re.sub(r"^[^A-Za-z0-9]+", "", text) # strip non-alphanumeric at start
	text = re.sub(r"\s==.$", "", text) # strip trailing "== $0" or similar
	text = re.sub(r"\s+", " ", text).strip() # collapse spaces

	# Replace dash between number and name with space (e.g., "26-Dupont" -> "26 Dupont")
	text = re.sub(r'(\d+)-([A-Za-z])', r'\1 \2', text)

	routes[rid] = text
	return jsonify(routes)

	@app.route('/listvehiclesbyroute', methods=['POST'])
	def listVehiclesByRoute():
	data = request.get_json()
	if not data or "route" not in data:
	return jsonify({"error": "Missing route parameter"}), 400
	route = data["route"]
	url = f"https://www.transsee.ca/routeveh?a=ttc&r={route}"
	headers = {
	'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
	'Referer': 'https://www.transsee.ca/',
	'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,/;q=0.8',
	'Accept-Language': 'en-US,en;q=0.9',
	'DNT': '1',
	'Connection': 'keep-alive',
	'Upgrade-Insecure-Requests': '1'
	}
	try:
	response = requests.get(url, headers=headers, timeout=10)
	response.raise_for_status()
	except Exception as e:
	return jsonify({"error": f"Failed to fetch route list: {e}"}), 500

	soup = BeautifulSoup(response.text, "html.parser")
	core_div = soup.find("div", class_="core")
	if not core_div:
	return jsonify({"error": "No vehicles found"}), 404

	vehicles = []
	all_popups = soup.find_all("div", class_="leaflet-popup-content")


	for p in core_div.find_all("p", id=True):
	vehicle_id = p.find("a").get_text(strip=True) if p.find("a") else None
	spans = p.find_all("span")
	delay = None
	if spans:
	first_text = spans[0].get_text(strip=True).lower()
	if "waiting" in first_text and len(spans) > 1:
	delay = spans[1].get_text(strip=True)
	else:
	delay = spans[0].get_text(strip=True)

	time = p.find("time").get_text(strip=True) if p.find("time") else None
	direction = None
	destination = None

	for part in p.stripped_strings:
	if part.startswith("going"):
	direction = part
	elif "to " in part: # destination line
	destination = part
	#print(f"{direction} {destination}")

	late = delay.split(' ')[-1].lower() == 'behind'
	delay_value = delay.split(' ')[0]

	# Convert delay from hh:mm:ss to mm:ss format
	if ":" in delay_value:
	parts = delay_value.split(":")
	if len(parts) == 3: # hh:mm:ss format
	try:
	h, m, s = parts
	# Convert to mm:ss by adding hours to minutes
	total_minutes = int(h) * 60 + int(m)
	delay_value = f"{total_minutes}:{s}"
	except:
	pass
	# If it's mm:ss format, keep as is

	delay = ("-" if late else "+") + delay_value

	# Extract coordinates from JavaScript AddMarker calls
	lat, lon = None, None

	# Find all script tags and look for AddMarker calls
	scripts = soup.find_all("script")
	for script in scripts:
	if script.string and "AddMarker" in script.string:
	# Split the script content by AddMarker calls and find the one with our vehicle ID
	add_marker_calls = script.string.split("AddMarker(")
	for call in add_marker_calls[1:]: # Skip the first split which is before AddMarker
	if f'"{vehicle_id}"' in call:
	# Extract coordinates from the first part [lat, lon]
	coords_match = re.search(r'\[([0-9\.\-]+),\s*([0-9\.\-]+)\]', call)
	if coords_match:
	lat, lon = float(coords_match.group(1)), float(coords_match.group(2))
	print(f"Found coordinates for vehicle {vehicle_id}: lat={lat}, lon={lon}")
	break
	if lat is not None:
	break

	if direction is None or destination is None: continue
	sneakygolem = direction.split(' ')
	branch = sneakygolem[-1] if len(sneakygolem) > 1 else ""
	vehicles.append({
	"vehicle_id": vehicle_id,
	"delay": delay,
	"time": time,
	"branch": branch,
	"destination": destination,
	"latitude": lat,
	"longitude": lon
	})

	return jsonify({
	"route": route,
	"vehicles": vehicles
	})

	@app.route("/alerts", methods=["GET"])
	def serviceAlerts():
	url = "https://www.transsee.ca/showmessages?a=ttc"
	headers = {
	'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
	'Referer': 'https://www.transsee.ca/',
	'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,/;q=0.8',
	'Accept-Language': 'en-US,en;q=0.9',
	'DNT': '1',
	'Connection': 'keep-alive',
	'Upgrade-Insecure-Requests': '1'
	}

	try:
	response = requests.get(url, headers=headers, timeout=10)
	response.raise_for_status()
	except Exception as e:
	return jsonify({"error": f"Failed to fetch service alerts: {e}"}), 500

	soup = BeautifulSoup(response.text, "html.parser")

	alerts = []
	for section in soup.select("section.HighAlert"):
	title = section.find("b").get_text(strip=True) if section.find("b") else ""
	route = section.find("div").get_text(strip=True) if section.find("div") else ""

	alerts.append({
	"title": title,
	"route": route
	})

	return jsonify(alerts)


	def getVehicleInfo(vehicle_id):
	url = f'https://www.transsee.ca/fleetfind?a=ttc&findtrack=1&q={vehicle_id}&Go=Go'
	headers = {
	'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
	'Referer': 'https://www.transsee.ca/',
	'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,/;q=0.8',
	'Accept-Language': 'en-US,en;q=0.9',
	'DNT': '1', # Do Not Track
	'Connection': 'keep-alive',
	'Upgrade-Insecure-Requests': '1'
	}
	try:
	response = requests.get(url, headers=headers, timeout=10)
	response.raise_for_status()
	except Exception as e:
	return jsonify({"error": f"Failed to fetch vehicle info: {e}"}), 500

	soup = BeautifulSoup(response.text, 'html.parser')

	# Find the specific paragraph with id=vehicle_id
	p = soup.find("p", id=vehicle_id)
	if not p:
	return jsonify({"error": f"Vehicle {vehicle_id} not found"}), 404

	# Get the HTML content to properly parse the structure
	html_content = str(p)

	# Extract direction (like "going D")
	direction = None
	direction_match = re.search(r'going\s+([A-Za-z0-9]+)', html_content)
	if direction_match:
	direction = direction_match.group(1)

	# Extract destination - only the text before <br>
	destination = None

	# Split by <br> and get only the first part
	before_br = html_content.split('<br/>')[0].split('<br>')[0]

	# Look for the quoted destination text that contains direction words
	dest_match = re.search(r'"([^"](?:South\|North\|East\|West)[^"])"', before_br)
	if dest_match:
	raw_dest = dest_match.group(1).strip()
	# Clean up - remove anything after "at " or "on " which indicates location details
	clean_dest = re.sub(r'at\s+.*$', '', raw_dest)
	destination = clean_dest.strip()
	else:
	# Fallback: extract text content and look for direction patterns
	temp_soup = BeautifulSoup(before_br, 'html.parser')
	text_content = temp_soup.get_text()
	dest_match = re.search(r'((?:South\|North\|East\|West) to [^=]?)(?=\s==\|\s*$)', text_content)
	if dest_match:
	raw_dest = dest_match.group(1).strip()
	clean_dest = re.sub(r'at\s+.*$', '', raw_dest)
	destination = clean_dest.strip()

	return direction, destination


	@app.route('/seek', methods=['POST'])
	def seek():
	if not request.is_json:
	return jsonify({'error': 'Request must be JSON'}), 400

	data = request.get_json()
	stop_id = data.get('stop')

	if not stop_id:
	return jsonify({'error': 'Stop ID is required'}), 404

	url = url = f"https://www.transsee.ca/smsstop?a=ttc&id={stop_id}"
	headers = {
	'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
	'Referer': 'https://www.transsee.ca/',
	'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,/;q=0.8',
	'Accept-Language': 'en-US,en;q=0.9',
	'DNT': '1', # Do Not Track
	'Connection': 'keep-alive',
	'Upgrade-Insecure-Requests': '1'
	}
	# Use Playwright to handle redirects and wait for page to load
	with sync_playwright() as p:
	# Find where Playwright actually installed Chrome
	import glob
	import os

	# Try different possible locations
	possible_locations = [
	"/root/.cache/ms-playwright/chromium-*/chrome-linux/chrome",
	"/home/user/.cache/ms-playwright/chromium-*/chrome-linux/chrome",
	"/tmp/playwright-browsers/chromium-*/chrome-linux/chrome",
	"/usr/lib/chromium-browser/chromium-browser",
	"/usr/bin/chromium-browser"
	]

	executable_path = None
	for pattern in possible_locations:
	if "*" in pattern:
	paths = glob.glob(pattern)
	if paths:
	executable_path = paths[0]
	print(f"Found Chrome at: {executable_path}")
	break
	else:
	if os.path.exists(pattern):
	executable_path = pattern
	print(f"Found Chrome at: {executable_path}")
	break

	if not executable_path:
	# Debug: show what's actually there
	print("Chrome not found. Checking directories:")
	for check_dir in ["/root/.cache", "/home", "/tmp", "/usr/bin"]:
	if os.path.exists(check_dir):
	print(f"Contents of {check_dir}:")
	os.system(f"find {check_dir} -name 'chrome' -o -name 'chromium' 2>/dev/null \| head -10")

	# Try to let Playwright find it automatically
	try:
	browser = p.chromium.launch(headless=True)
	print("Playwright found Chrome automatically")
	except Exception as e:
	raise Exception(f"Chrome not found anywhere and Playwright can't find it: {e}")
	else:
	browser = p.chromium.launch(headless=True, executable_path=executable_path)
	print(f"Using Chrome executable: {executable_path}")

	page = browser.new_page()
	page.goto(url)

	# Wait for divp elements to load
	try:
	page.wait_for_selector("div.divp", timeout=15000)
	except:
	pass

	html = page.content()
	browser.close()

	soup = BeautifulSoup(html, 'html.parser')

	# Extract routes from the <p> tags that contain <b> tags
	routes = []
	for p_tag in soup.find_all('p', id=re.compile(r'^\d+_\d+$')):
	b_tag = p_tag.find('b')
	if b_tag:
	route_link = b_tag.find('a', href=re.compile(r'stoplist\?a=ttc&r=\d+'))
	if route_link:
	route_text = route_link.get_text(strip=True)
	# Remove dash between number and name (e.g., "133-Neilson" -> "133 Neilson")
	route_text = re.sub(r'(\d+)-([A-Za-z])', r'\1 \2', route_text)

	# Get the full text content of the p tag for branch and destination
	full_text = p_tag.get_text()

	# Extract branch (letter/number after "going" if it exists)
	branch = None
	branch_match = re.search(r'going\s+([A-Za-z0-9]+)', full_text)
	if branch_match:
	branch_text = branch_match.group(1)
	# Only treat single characters/numbers as branches (A, B, 1, 2, etc.)
	if len(branch_text) == 1 and branch_text.isalnum():
	branch = branch_text

	# Extract destination (everything after "to" until the end or next punctuation)
	destination = None
	destination_match = re.search(r'to\s+([^.]+)', full_text)
	if destination_match:
	destination = destination_match.group(1).strip()

	routes.append({
	'name': route_text,
	'branch': branch,
	'destination': destination
	})

	pattern = re.compile(r'^\d{1,3}_\d{3,5}_[1-4]$')

	vehicles = []

	for div_tag in soup.find_all('div', class_='divp', id=pattern):
	vehicle_id = div_tag.get('id')
	id_parts = vehicle_id.split('_')
	route = id_parts[0] # Extract first 1-3 digits as route

	vehicle_data = {
	'route': route,
	}

	# Get both timedisp times
	timedisp_elements = div_tag.find_all('time', class_='timedisp')
	if len(timedisp_elements) >= 2:
	vehicle_data['actual'] = timedisp_elements[0].get_text(strip=True)
	vehicle_data['scheduled'] = timedisp_elements[1].get_text(strip=True)
	elif len(timedisp_elements) == 1:
	vehicle_data['actual'] = timedisp_elements[0].get_text(strip=True)
	vehicle_data['scheduled'] = None
	else:
	vehicle_data['actual'] = None
	vehicle_data['scheduled'] = None

	# If actual is null, replace with current time (bus is at stop)
	if vehicle_data['actual'] is None:
	# Get current time in EST/EDT timezone
	est_tz = pytz.timezone('America/New_York')
	current_time = datetime.now(est_tz)
	vehicle_data['actual'] = current_time.strftime("%I:%M:%S%p")

	# Get delay/ahead status (like "1:10 ahead")
	delay_span = None
	for span in div_tag.find_all('span', style=True):
	if 'color: light-dark' in span['style']:
	span_text = span.get_text(strip=True)
	if 'ahead' in span_text or 'behind' in span_text:
	delay_span = span
	break

	if delay_span:
	delay_text = delay_span.get_text(strip=True)
	if 'ahead' in delay_text:
	vehicle_data['delay'] = "+" + delay_text.replace(' ahead', '')
	elif 'behind' in delay_text:
	vehicle_data['delay'] = "-" + delay_text.replace(' behind', '')
	else:
	vehicle_data['delay'] = delay_text
	else:
	vehicle_data['delay'] = "0"

	# Get vehicle number from #MapMain link or plain text
	vehicle_link = div_tag.find('a', href="#MapMain")
	if vehicle_link:
	vehicle_data['vehicle_number'] = vehicle_link.get_text(strip=True)
	else:
	# Look for vehicle number in plain text like "Vehicle 1243 Load"
	text = div_tag.get_text()
	match = re.search(r'Vehicle\s+(\d+)', text)
	if match:
	vehicle_data['vehicle_number'] = match.group(1)
	else:
	vehicle_data['vehicle_number'] = None

	if vehicle_data['vehicle_number'] is None: continue
	vehicle_data['direction'], vehicle_data['destination'] = getVehicleInfo(vehicle_data['vehicle_number'])
	vehicles.append(vehicle_data)

	return jsonify({
	'stop': stop_id,
	'routes': routes,
	'vehicles': vehicles
	})

	@app.route('/', methods=['GET'])
	def health_check():
	return 'Backend is running!', 200

	if __name__ == "__main__":
	port = int(os.environ.get("PORT", 7860))
	app.run(host="0.0.0.0", port=port, debug=False)