TransseeAPI / app.py
42Cummer's picture
remove debug and typo
1d7a49e verified
raw
history blame
19.7 kB
from flask import Flask, render_template, request, jsonify
from flask_cors import CORS
import re
import requests
from bs4 import BeautifulSoup
import tempfile
import os
from playwright.sync_api import sync_playwright
from datetime import datetime
import pytz
os.environ["SE_CACHE_PATH"] = "/tmp/selenium"
app = Flask(__name__)
CORS(app)
import re
import requests
from bs4 import BeautifulSoup
from flask import jsonify
@app.route('/routelist', methods=['GET'])
def getRouteList():
url = "https://www.transsee.ca/routelist?a=ttc&ShowAll=1"
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
'Referer': 'https://www.transsee.ca/',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.9',
'DNT': '1',
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1'
}
try:
response = requests.get(url, headers=headers, timeout=10)
response.raise_for_status()
except Exception as e:
return jsonify({"error": f"Failed to fetch route list: {e}"}), 500
soup = BeautifulSoup(response.text, "html.parser")
routes = {}
for rid in range(7, 204): # all conventional routes
tag = soup.find("p", {"id": str(rid)})
if tag and tag.text.strip():
# Clean text:
text = tag.get_text(strip=True)
# Remove leading symbols like "*" or "== $0"
text = re.sub(r"^[^A-Za-z0-9]+", "", text) # strip non-alphanumeric at start
text = re.sub(r"\s*==.*$", "", text) # strip trailing "== $0" or similar
text = re.sub(r"\s+", " ", text).strip() # collapse spaces
# Replace dash between number and name with space (e.g., "26-Dupont" -> "26 Dupont")
text = re.sub(r'(\d+)-([A-Za-z])', r'\1 \2', text)
routes[rid] = text
for rid in range(300, 399): # all blue night routes
tag = soup.find("p", {"id": str(rid)})
if tag and tag.text.strip():
# Clean text:
text = tag.get_text(strip=True)
# Remove leading symbols like "*" or "== $0"
text = re.sub(r"^[^A-Za-z0-9]+", "", text) # strip non-alphanumeric at start
text = re.sub(r"\s*==.*$", "", text) # strip trailing "== $0" or similar
text = re.sub(r"\s+", " ", text).strip() # collapse spaces
# Replace dash between number and name with space (e.g., "26-Dupont" -> "26 Dupont")
text = re.sub(r'(\d+)-([A-Za-z])', r'\1 \2', text)
routes[rid] = text
for rid in range(900, 999): # all express routes
tag = soup.find("p", {"id": str(rid)})
if tag and tag.text.strip():
# Clean text:
text = tag.get_text(strip=True)
# Remove leading symbols like "*" or "== $0"
text = re.sub(r"^[^A-Za-z0-9]+", "", text) # strip non-alphanumeric at start
text = re.sub(r"\s*==.*$", "", text) # strip trailing "== $0" or similar
text = re.sub(r"\s+", " ", text).strip() # collapse spaces
# Replace dash between number and name with space (e.g., "26-Dupont" -> "26 Dupont")
text = re.sub(r'(\d+)-([A-Za-z])', r'\1 \2', text)
routes[rid] = text
return jsonify(routes)
@app.route('/listvehiclesbyroute', methods=['POST'])
def listVehiclesByRoute():
data = request.get_json()
if not data or "route" not in data:
return jsonify({"error": "Missing route parameter"}), 400
route = data["route"]
url = f"https://www.transsee.ca/routeveh?a=ttc&r={route}"
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
'Referer': 'https://www.transsee.ca/',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.9',
'DNT': '1',
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1'
}
try:
response = requests.get(url, headers=headers, timeout=10)
response.raise_for_status()
except Exception as e:
return jsonify({"error": f"Failed to fetch route list: {e}"}), 500
soup = BeautifulSoup(response.text, "html.parser")
core_div = soup.find("div", class_="core")
if not core_div:
return jsonify({"error": "No vehicles found"}), 404
vehicles = []
all_popups = soup.find_all("div", class_="leaflet-popup-content")
for p in core_div.find_all("p", id=True):
vehicle_id = p.find("a").get_text(strip=True) if p.find("a") else None
spans = p.find_all("span")
delay = None
if spans:
first_text = spans[0].get_text(strip=True).lower()
if "waiting" in first_text and len(spans) > 1:
delay = spans[1].get_text(strip=True)
else:
delay = spans[0].get_text(strip=True)
time = p.find("time").get_text(strip=True) if p.find("time") else None
direction = None
destination = None
for part in p.stripped_strings:
if part.startswith("going"):
direction = part
elif "to " in part: # destination line
destination = part
#print(f"{direction} {destination}")
late = delay.split(' ')[-1].lower() == 'behind'
delay_value = delay.split(' ')[0]
# Convert delay from hh:mm:ss to mm:ss format
if ":" in delay_value:
parts = delay_value.split(":")
if len(parts) == 3: # hh:mm:ss format
try:
h, m, s = parts
# Convert to mm:ss by adding hours to minutes
total_minutes = int(h) * 60 + int(m)
delay_value = f"{total_minutes}:{s}"
except:
pass
# If it's mm:ss format, keep as is
delay = ("-" if late else "+") + delay_value
# Extract coordinates from JavaScript AddMarker calls
lat, lon = None, None
# Find all script tags and look for AddMarker calls
scripts = soup.find_all("script")
for script in scripts:
if script.string and "AddMarker" in script.string:
# Split the script content by AddMarker calls and find the one with our vehicle ID
add_marker_calls = script.string.split("AddMarker(")
for call in add_marker_calls[1:]: # Skip the first split which is before AddMarker
if f'"{vehicle_id}"' in call:
# Extract coordinates from the first part [lat, lon]
coords_match = re.search(r'\[([0-9\.\-]+),\s*([0-9\.\-]+)\]', call)
if coords_match:
lat, lon = float(coords_match.group(1)), float(coords_match.group(2))
print(f"Found coordinates for vehicle {vehicle_id}: lat={lat}, lon={lon}")
break
if lat is not None:
break
if direction is None or destination is None: continue
sneakygolem = direction.split(' ')
branch = sneakygolem[-1] if len(sneakygolem) > 1 else ""
vehicles.append({
"vehicle_id": vehicle_id,
"delay": delay,
"time": time,
"branch": branch,
"destination": destination,
"latitude": lat,
"longitude": lon
})
return jsonify({
"route": route,
"vehicles": vehicles
})
@app.route("/alerts", methods=["GET"])
def serviceAlerts():
url = "https://www.transsee.ca/showmessages?a=ttc"
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
'Referer': 'https://www.transsee.ca/',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.9',
'DNT': '1',
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1'
}
try:
response = requests.get(url, headers=headers, timeout=10)
response.raise_for_status()
except Exception as e:
return jsonify({"error": f"Failed to fetch service alerts: {e}"}), 500
soup = BeautifulSoup(response.text, "html.parser")
alerts = []
for section in soup.select("section.HighAlert"):
title = section.find("b").get_text(strip=True) if section.find("b") else ""
route = section.find("div").get_text(strip=True) if section.find("div") else ""
alerts.append({
"title": title,
"route": route
})
return jsonify(alerts)
def getVehicleInfo(vehicle_id):
url = f'https://www.transsee.ca/fleetfind?a=ttc&findtrack=1&q={vehicle_id}&Go=Go'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
'Referer': 'https://www.transsee.ca/',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.9',
'DNT': '1', # Do Not Track
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1'
}
try:
response = requests.get(url, headers=headers, timeout=10)
response.raise_for_status()
except Exception as e:
return jsonify({"error": f"Failed to fetch vehicle info: {e}"}), 500
soup = BeautifulSoup(response.text, 'html.parser')
# Find the specific paragraph with id=vehicle_id
p = soup.find("p", id=vehicle_id)
if not p:
return jsonify({"error": f"Vehicle {vehicle_id} not found"}), 404
# Get the HTML content to properly parse the structure
html_content = str(p)
# Extract direction (like "going D")
direction = None
direction_match = re.search(r'going\s+([A-Za-z0-9]+)', html_content)
if direction_match:
direction = direction_match.group(1)
# Extract destination - only the text before <br>
destination = None
# Split by <br> and get only the first part
before_br = html_content.split('<br/>')[0].split('<br>')[0]
# Look for the quoted destination text that contains direction words
dest_match = re.search(r'"([^"]*(?:South|North|East|West)[^"]*)"', before_br)
if dest_match:
raw_dest = dest_match.group(1).strip()
# Clean up - remove anything after "at " or "on " which indicates location details
clean_dest = re.sub(r'at\s+.*$', '', raw_dest)
destination = clean_dest.strip()
else:
# Fallback: extract text content and look for direction patterns
temp_soup = BeautifulSoup(before_br, 'html.parser')
text_content = temp_soup.get_text()
dest_match = re.search(r'((?:South|North|East|West) to [^=]*?)(?=\s*==|\s*$)', text_content)
if dest_match:
raw_dest = dest_match.group(1).strip()
clean_dest = re.sub(r'at\s+.*$', '', raw_dest)
destination = clean_dest.strip()
return direction, destination
@app.route('/seek', methods=['POST'])
def seek():
if not request.is_json:
return jsonify({'error': 'Request must be JSON'}), 400
data = request.get_json()
stop_id = data.get('stop')
if not stop_id:
return jsonify({'error': 'Stop ID is required'}), 404
url = url = f"https://www.transsee.ca/smsstop?a=ttc&id={stop_id}"
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
'Referer': 'https://www.transsee.ca/',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.9',
'DNT': '1', # Do Not Track
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1'
}
# Use Playwright to handle redirects and wait for page to load
with sync_playwright() as p:
# Find where Playwright actually installed Chrome
import glob
import os
# Try different possible locations
possible_locations = [
"/root/.cache/ms-playwright/chromium-*/chrome-linux/chrome",
"/home/user/.cache/ms-playwright/chromium-*/chrome-linux/chrome",
"/tmp/playwright-browsers/chromium-*/chrome-linux/chrome",
"/usr/lib/chromium-browser/chromium-browser",
"/usr/bin/chromium-browser"
]
executable_path = None
for pattern in possible_locations:
if "*" in pattern:
paths = glob.glob(pattern)
if paths:
executable_path = paths[0]
print(f"Found Chrome at: {executable_path}")
break
else:
if os.path.exists(pattern):
executable_path = pattern
print(f"Found Chrome at: {executable_path}")
break
if not executable_path:
# Debug: show what's actually there
print("Chrome not found. Checking directories:")
for check_dir in ["/root/.cache", "/home", "/tmp", "/usr/bin"]:
if os.path.exists(check_dir):
print(f"Contents of {check_dir}:")
os.system(f"find {check_dir} -name '*chrome*' -o -name '*chromium*' 2>/dev/null | head -10")
# Try to let Playwright find it automatically
try:
browser = p.chromium.launch(headless=True)
print("Playwright found Chrome automatically")
except Exception as e:
raise Exception(f"Chrome not found anywhere and Playwright can't find it: {e}")
else:
browser = p.chromium.launch(headless=True, executable_path=executable_path)
print(f"Using Chrome executable: {executable_path}")
page = browser.new_page()
page.goto(url)
# Wait for divp elements to load
try:
page.wait_for_selector("div.divp", timeout=15000)
except:
pass
html = page.content()
browser.close()
soup = BeautifulSoup(html, 'html.parser')
# Extract routes from the <p> tags that contain <b> tags
routes = []
for p_tag in soup.find_all('p', id=re.compile(r'^\d+_\d+$')):
b_tag = p_tag.find('b')
if b_tag:
route_link = b_tag.find('a', href=re.compile(r'stoplist\?a=ttc&r=\d+'))
if route_link:
route_text = route_link.get_text(strip=True)
# Remove dash between number and name (e.g., "133-Neilson" -> "133 Neilson")
route_text = re.sub(r'(\d+)-([A-Za-z])', r'\1 \2', route_text)
# Get the full text content of the p tag for branch and destination
full_text = p_tag.get_text()
# Extract branch (letter/number after "going" if it exists)
branch = None
branch_match = re.search(r'going\s+([A-Za-z0-9]+)', full_text)
if branch_match:
branch_text = branch_match.group(1)
# Only treat single characters/numbers as branches (A, B, 1, 2, etc.)
if len(branch_text) == 1 and branch_text.isalnum():
branch = branch_text
# Extract destination (everything after "to" until the end or next punctuation)
destination = None
destination_match = re.search(r'to\s+([^.]+)', full_text)
if destination_match:
destination = destination_match.group(1).strip()
routes.append({
'name': route_text,
'branch': branch,
'destination': destination
})
pattern = re.compile(r'^\d{1,3}_\d{3,5}_[1-4]$')
vehicles = []
for div_tag in soup.find_all('div', class_='divp', id=pattern):
vehicle_id = div_tag.get('id')
id_parts = vehicle_id.split('_')
route = id_parts[0] # Extract first 1-3 digits as route
vehicle_data = {
'route': route,
}
# Get both timedisp times
timedisp_elements = div_tag.find_all('time', class_='timedisp')
if len(timedisp_elements) >= 2:
vehicle_data['actual'] = timedisp_elements[0].get_text(strip=True)
vehicle_data['scheduled'] = timedisp_elements[1].get_text(strip=True)
elif len(timedisp_elements) == 1:
vehicle_data['actual'] = timedisp_elements[0].get_text(strip=True)
vehicle_data['scheduled'] = None
else:
vehicle_data['actual'] = None
vehicle_data['scheduled'] = None
# If actual is null, replace with current time (bus is at stop)
if vehicle_data['actual'] is None:
# Get current time in EST/EDT timezone
est_tz = pytz.timezone('America/New_York')
current_time = datetime.now(est_tz)
vehicle_data['actual'] = current_time.strftime("%I:%M:%S%p")
# Get delay/ahead status (like "1:10 ahead")
delay_span = None
for span in div_tag.find_all('span', style=True):
if 'color: light-dark' in span['style']:
span_text = span.get_text(strip=True)
if 'ahead' in span_text or 'behind' in span_text:
delay_span = span
break
if delay_span:
delay_text = delay_span.get_text(strip=True)
if 'ahead' in delay_text:
vehicle_data['delay'] = "+" + delay_text.replace(' ahead', '')
elif 'behind' in delay_text:
vehicle_data['delay'] = "-" + delay_text.replace(' behind', '')
else:
vehicle_data['delay'] = delay_text
else:
vehicle_data['delay'] = "0"
# Get vehicle number from #MapMain link or plain text
vehicle_link = div_tag.find('a', href="#MapMain")
if vehicle_link:
vehicle_data['vehicle_number'] = vehicle_link.get_text(strip=True)
else:
# Look for vehicle number in plain text like "Vehicle 1243 Load"
text = div_tag.get_text()
match = re.search(r'Vehicle\s+(\d+)', text)
if match:
vehicle_data['vehicle_number'] = match.group(1)
else:
vehicle_data['vehicle_number'] = None
if vehicle_data['vehicle_number'] is None: continue
vehicle_data['direction'], vehicle_data['destination'] = getVehicleInfo(vehicle_data['vehicle_number'])
vehicles.append(vehicle_data)
return jsonify({
'stop': stop_id,
'routes': routes,
'vehicles': vehicles
})
@app.route('/', methods=['GET'])
def health_check():
return 'Backend is running!', 200
if __name__ == "__main__":
port = int(os.environ.get("PORT", 7860))
app.run(host="0.0.0.0", port=port, debug=False)