Spaces:

42Cummer
/

TransseeAPI

Sleeping

File size: 20,522 Bytes

from flask import Flask, render_template, request, jsonify
from flask_cors import CORS
import re
import requests
from bs4 import BeautifulSoup
import tempfile
import os
from playwright.sync_api import sync_playwright
from datetime import datetime
import pytz

os.environ["SE_CACHE_PATH"] = "/tmp/selenium"

app = Flask(__name__)
CORS(app)

import re
import requests
from bs4 import BeautifulSoup
from flask import jsonify

@app.route('/routelist', methods=['GET'])
def getRouteList():
    url = "https://www.transsee.ca/routelist?a=ttc&ShowAll=1"
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
        'Referer': 'https://www.transsee.ca/',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
        'Accept-Language': 'en-US,en;q=0.9',
        'DNT': '1',
        'Connection': 'keep-alive',
        'Upgrade-Insecure-Requests': '1'
    }
    try:
        response = requests.get(url, headers=headers, timeout=10)
        response.raise_for_status()
    except Exception as e:
        return jsonify({"error": f"Failed to fetch route list: {e}"}), 500

    soup = BeautifulSoup(response.text, "html.parser")
    routes = {}

    for rid in range(7, 204): # all conventional routes
        tag = soup.find("p", {"id": str(rid)})
        if tag and tag.text.strip():
            # Clean text:
            text = tag.get_text(strip=True)

            # Remove leading symbols like "*" or "== $0"
            text = re.sub(r"^[^A-Za-z0-9]+", "", text)   # strip non-alphanumeric at start
            text = re.sub(r"\s*==.*$", "", text)        # strip trailing "== $0" or similar
            text = re.sub(r"\s+", " ", text).strip()    # collapse spaces
            
            # Replace dash between number and name with space (e.g., "26-Dupont" -> "26 Dupont")
            text = re.sub(r'(\d+)-([A-Za-z])', r'\1 \2', text)

            routes[rid] = text

    for rid in range(300, 399): # all blue night routes
        tag = soup.find("p", {"id": str(rid)})
        if tag and tag.text.strip():
            # Clean text:
            text = tag.get_text(strip=True)

            # Remove leading symbols like "*" or "== $0"
            text = re.sub(r"^[^A-Za-z0-9]+", "", text)   # strip non-alphanumeric at start
            text = re.sub(r"\s*==.*$", "", text)        # strip trailing "== $0" or similar
            text = re.sub(r"\s+", " ", text).strip()    # collapse spaces
            
            # Replace dash between number and name with space (e.g., "26-Dupont" -> "26 Dupont")
            text = re.sub(r'(\d+)-([A-Za-z])', r'\1 \2', text)

            routes[rid] = text

    for rid in range(500, 599): # all streetcar routes
        tag = soup.find("p", {"id": str(rid)})
        if tag and tag.text.strip():
            # Clean text:
            text = tag.get_text(strip=True)

            # Remove leading symbols like "*" or "== $0"
            text = re.sub(r"^[^A-Za-z0-9]+", "", text)   # strip non-alphanumeric at start
            text = re.sub(r"\s*==.*$", "", text)        # strip trailing "== $0" or similar
            text = re.sub(r"\s+", " ", text).strip()    # collapse spaces
            
            # Replace dash between number and name with space (e.g., "26-Dupont" -> "26 Dupont")
            text = re.sub(r'(\d+)-([A-Za-z])', r'\1 \2', text)

            routes[rid] = text

    for rid in range(900, 999): # all express routes
        tag = soup.find("p", {"id": str(rid)})
        if tag and tag.text.strip():
            # Clean text:
            text = tag.get_text(strip=True)

            # Remove leading symbols like "*" or "== $0"
            text = re.sub(r"^[^A-Za-z0-9]+", "", text)   # strip non-alphanumeric at start
            text = re.sub(r"\s*==.*$", "", text)        # strip trailing "== $0" or similar
            text = re.sub(r"\s+", " ", text).strip()    # collapse spaces
            
            # Replace dash between number and name with space (e.g., "26-Dupont" -> "26 Dupont")
            text = re.sub(r'(\d+)-([A-Za-z])', r'\1 \2', text)

            routes[rid] = text
    return jsonify(routes)

@app.route('/listvehiclesbyroute', methods=['POST'])
def listVehiclesByRoute():
    data = request.get_json()
    if not data or "route" not in data:
        return jsonify({"error": "Missing route parameter"}), 400
    route = data["route"]
    url = f"https://www.transsee.ca/routeveh?a=ttc&r={route}"
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
        'Referer': 'https://www.transsee.ca/',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
        'Accept-Language': 'en-US,en;q=0.9',
        'DNT': '1',
        'Connection': 'keep-alive',
        'Upgrade-Insecure-Requests': '1'
    }
    try:
        response = requests.get(url, headers=headers, timeout=10)
        response.raise_for_status()
    except Exception as e:
        return jsonify({"error": f"Failed to fetch route list: {e}"}), 500

    soup = BeautifulSoup(response.text, "html.parser")
    core_div = soup.find("div", class_="core")
    if not core_div:
        return jsonify({"error": "No vehicles found"}), 404

    vehicles = []
    all_popups = soup.find_all("div", class_="leaflet-popup-content")


    for p in core_div.find_all("p", id=True):
        vehicle_id = p.find("a").get_text(strip=True) if p.find("a") else None
        spans = p.find_all("span")
        delay = None
        if spans:
            first_text = spans[0].get_text(strip=True).lower()
            if "waiting" in first_text and len(spans) > 1:
                delay = spans[1].get_text(strip=True)
            else:
                delay = spans[0].get_text(strip=True)

        time = p.find("time").get_text(strip=True) if p.find("time") else None
        direction = None
        destination = None

        for part in p.stripped_strings:
            if part.startswith("going"):
                direction = part
            elif "to " in part:  # destination line
                destination = part
            #print(f"{direction} {destination}")

        if delay is None: continue
        late = delay.split(' ')[-1].lower() == 'behind'
        delay_value = delay.split(' ')[0]
        
        # Convert delay from hh:mm:ss to mm:ss format
        if ":" in delay_value:
            parts = delay_value.split(":")
            if len(parts) == 3:  # hh:mm:ss format
                try:
                    h, m, s = parts
                    # Convert to mm:ss by adding hours to minutes
                    total_minutes = int(h) * 60 + int(m)
                    delay_value = f"{total_minutes}:{s}"
                except:
                    pass
            # If it's mm:ss format, keep as is
        
        delay = ("-" if late else "+") + delay_value

        # Extract coordinates from JavaScript AddMarker calls
        lat, lon = None, None
        
        # Find all script tags and look for AddMarker calls
        scripts = soup.find_all("script")
        for script in scripts:
            if script.string and "AddMarker" in script.string:
                # Split the script content by AddMarker calls and find the one with our vehicle ID
                add_marker_calls = script.string.split("AddMarker(")
                for call in add_marker_calls[1:]:  # Skip the first split which is before AddMarker
                    if f'"{vehicle_id}"' in call:
                        # Extract coordinates from the first part [lat, lon]
                        coords_match = re.search(r'\[([0-9\.\-]+),\s*([0-9\.\-]+)\]', call)
                        if coords_match:
                            lat, lon = float(coords_match.group(1)), float(coords_match.group(2))
                            print(f"Found coordinates for vehicle {vehicle_id}: lat={lat}, lon={lon}")
                            break
                if lat is not None:
                    break

        #if direction is None or destination is None: continue
        if direction is not None:
            sneakygolem = direction.split(' ')
            branch = sneakygolem[-1] if len(sneakygolem) > 1 else ""
        else:
            branch = None
        vehicles.append({
            "vehicle_id": vehicle_id,
            "delay": delay,
            "time": time,
            "branch": branch,
            "destination": destination,
            "latitude": lat,
            "longitude": lon
        })

    return jsonify({
        "route": route,
        "vehicles": vehicles
    })

@app.route("/alerts", methods=["GET"])
def serviceAlerts():
    url = "https://www.transsee.ca/showmessages?a=ttc"
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
        'Referer': 'https://www.transsee.ca/',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
        'Accept-Language': 'en-US,en;q=0.9',
        'DNT': '1',
        'Connection': 'keep-alive',
        'Upgrade-Insecure-Requests': '1'
    }

    try:
        response = requests.get(url, headers=headers, timeout=10)
        response.raise_for_status()
    except Exception as e:
        return jsonify({"error": f"Failed to fetch service alerts: {e}"}), 500

    soup = BeautifulSoup(response.text, "html.parser")

    alerts = []
    for section in soup.select("section.HighAlert"):
        title = section.find("b").get_text(strip=True) if section.find("b") else ""
        route = section.find("div").get_text(strip=True) if section.find("div") else ""

        alerts.append({
            "title": title,
            "route": route
        })

    return jsonify(alerts)


def getVehicleInfo(vehicle_id):
    url = f'https://www.transsee.ca/fleetfind?a=ttc&findtrack=1&q={vehicle_id}&Go=Go'
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
        'Referer': 'https://www.transsee.ca/',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
        'Accept-Language': 'en-US,en;q=0.9',
        'DNT': '1',  # Do Not Track
        'Connection': 'keep-alive',
        'Upgrade-Insecure-Requests': '1'
    }
    try:
        response = requests.get(url, headers=headers, timeout=10)
        response.raise_for_status()
    except Exception as e:
        return jsonify({"error": f"Failed to fetch vehicle info: {e}"}), 500
    
    soup = BeautifulSoup(response.text, 'html.parser')
    
    # Find the specific paragraph with id=vehicle_id
    p = soup.find("p", id=vehicle_id)
    if not p:
        return jsonify({"error": f"Vehicle {vehicle_id} not found"}), 404
    
    # Get the HTML content to properly parse the structure
    html_content = str(p)
    
    # Extract direction (like "going D")
    direction = None
    direction_match = re.search(r'going\s+([A-Za-z0-9]+)', html_content)
    if direction_match:
        direction = direction_match.group(1)
    
    # Extract destination - only the text before <br>
    destination = None
    
    # Split by <br> and get only the first part
    before_br = html_content.split('<br/>')[0].split('<br>')[0]
    
    # Look for the quoted destination text that contains direction words
    dest_match = re.search(r'"([^"]*(?:South|North|East|West)[^"]*)"', before_br)
    if dest_match:
        raw_dest = dest_match.group(1).strip()
        # Clean up - remove anything after "at " or "on " which indicates location details
        clean_dest = re.sub(r'at\s+.*$', '', raw_dest)
        destination = clean_dest.strip()
    else:
        # Fallback: extract text content and look for direction patterns
        temp_soup = BeautifulSoup(before_br, 'html.parser')
        text_content = temp_soup.get_text()
        dest_match = re.search(r'((?:South|North|East|West) to [^=]*?)(?=\s*==|\s*$)', text_content)
        if dest_match:
            raw_dest = dest_match.group(1).strip()
            clean_dest = re.sub(r'at\s+.*$', '', raw_dest)
            destination = clean_dest.strip()
    
    return direction, destination


@app.route('/seek', methods=['POST'])
def seek():
    if not request.is_json:
        return jsonify({'error': 'Request must be JSON'}), 400
    
    data = request.get_json()
    stop_id = data.get('stop')
    
    if not stop_id:
        return jsonify({'error': 'Stop ID is required'}), 404
        
    url = url = f"https://www.transsee.ca/smsstop?a=ttc&id={stop_id}"
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
        'Referer': 'https://www.transsee.ca/',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
        'Accept-Language': 'en-US,en;q=0.9',
        'DNT': '1',  # Do Not Track
        'Connection': 'keep-alive',
        'Upgrade-Insecure-Requests': '1'
    }
    # Use Playwright to handle redirects and wait for page to load
    with sync_playwright() as p:
        # Find where Playwright actually installed Chrome
        import glob
        import os
        
        # Try different possible locations
        possible_locations = [
            "/root/.cache/ms-playwright/chromium-*/chrome-linux/chrome",
            "/home/user/.cache/ms-playwright/chromium-*/chrome-linux/chrome", 
            "/tmp/playwright-browsers/chromium-*/chrome-linux/chrome",
            "/usr/lib/chromium-browser/chromium-browser",
            "/usr/bin/chromium-browser"
        ]
        
        executable_path = None
        for pattern in possible_locations:
            if "*" in pattern:
                paths = glob.glob(pattern)
                if paths:
                    executable_path = paths[0]
                    print(f"Found Chrome at: {executable_path}")
                    break
            else:
                if os.path.exists(pattern):
                    executable_path = pattern
                    print(f"Found Chrome at: {executable_path}")
                    break
        
        if not executable_path:
            # Debug: show what's actually there
            print("Chrome not found. Checking directories:")
            for check_dir in ["/root/.cache", "/home", "/tmp", "/usr/bin"]:
                if os.path.exists(check_dir):
                    print(f"Contents of {check_dir}:")
                    os.system(f"find {check_dir} -name '*chrome*' -o -name '*chromium*' 2>/dev/null | head -10")
            
            # Try to let Playwright find it automatically
            try:
                browser = p.chromium.launch(headless=True)
                print("Playwright found Chrome automatically")
            except Exception as e:
                raise Exception(f"Chrome not found anywhere and Playwright can't find it: {e}")
        else:
            browser = p.chromium.launch(headless=True, executable_path=executable_path)
            print(f"Using Chrome executable: {executable_path}")
        
        page = browser.new_page()
        page.goto(url)
        
        # Wait for divp elements to load
        try:
            page.wait_for_selector("div.divp", timeout=15000)
        except:
            pass
        
        html = page.content()
        browser.close()
    
    soup = BeautifulSoup(html, 'html.parser')

    # Extract routes from the <p> tags that contain <b> tags
    routes = []
    for p_tag in soup.find_all('p', id=re.compile(r'^\d+_\d+$')):
        b_tag = p_tag.find('b')
        if b_tag:
            route_link = b_tag.find('a', href=re.compile(r'stoplist\?a=ttc&r=\d+'))
            if route_link:
                route_text = route_link.get_text(strip=True)
                # Remove dash between number and name (e.g., "133-Neilson" -> "133 Neilson")
                route_text = re.sub(r'(\d+)-([A-Za-z])', r'\1 \2', route_text)
                
                # Get the full text content of the p tag for branch and destination
                full_text = p_tag.get_text()
                
                # Extract branch (letter/number after "going" if it exists)
                branch = None
                branch_match = re.search(r'going\s+([A-Za-z0-9]+)', full_text)
                if branch_match:
                    branch_text = branch_match.group(1)
                    # Only treat single characters/numbers as branches (A, B, 1, 2, etc.)
                    if len(branch_text) == 1 and branch_text.isalnum():
                        branch = branch_text
                
                # Extract destination (everything after "to" until the end or next punctuation)
                destination = None
                destination_match = re.search(r'to\s+([^.]+)', full_text)
                if destination_match:
                    destination = destination_match.group(1).strip()
                
                routes.append({
                    'name': route_text,
                    'branch': branch,
                    'destination': destination
                })

    pattern = re.compile(r'^\d{1,3}_\d{3,5}_[1-4]$')

    vehicles = []

    for div_tag in soup.find_all('div', class_='divp', id=pattern):
        vehicle_id = div_tag.get('id')
        id_parts = vehicle_id.split('_')
        route = id_parts[0]  # Extract first 1-3 digits as route

        vehicle_data = {
            'route': route,
        }

        # Get both timedisp times
        timedisp_elements = div_tag.find_all('time', class_='timedisp')
        if len(timedisp_elements) >= 2:
            vehicle_data['actual'] = timedisp_elements[0].get_text(strip=True)
            vehicle_data['scheduled'] = timedisp_elements[1].get_text(strip=True)
        elif len(timedisp_elements) == 1:
            vehicle_data['actual'] = timedisp_elements[0].get_text(strip=True)
            vehicle_data['scheduled'] = None
        else:
            vehicle_data['actual'] = None
            vehicle_data['scheduled'] = None

        # If actual is null, replace with current time (bus is at stop)
        if vehicle_data['actual'] is None:
            # Get current time in EST/EDT timezone
            est_tz = pytz.timezone('America/New_York')
            current_time = datetime.now(est_tz)
            vehicle_data['actual'] = current_time.strftime("%I:%M:%S%p")

        # Get delay/ahead status (like "1:10 ahead")
        delay_span = None
        for span in div_tag.find_all('span', style=True):
            if 'color: light-dark' in span['style']:
                span_text = span.get_text(strip=True)
                if 'ahead' in span_text or 'behind' in span_text:
                    delay_span = span
                    break

        if delay_span:
            delay_text = delay_span.get_text(strip=True)
            if 'ahead' in delay_text:
                vehicle_data['delay'] = "+" + delay_text.replace(' ahead', '')
            elif 'behind' in delay_text:
                vehicle_data['delay'] = "-" + delay_text.replace(' behind', '')
            else:
                vehicle_data['delay'] = delay_text
        else:
            vehicle_data['delay'] = "0"

        # Get vehicle number from #MapMain link or plain text
        vehicle_link = div_tag.find('a', href="#MapMain")
        if vehicle_link:
            vehicle_data['vehicle_number'] = vehicle_link.get_text(strip=True)
        else:
            # Look for vehicle number in plain text like "Vehicle 1243 Load"
            text = div_tag.get_text()
            match = re.search(r'Vehicle\s+(\d+)', text)
            if match:
                vehicle_data['vehicle_number'] = match.group(1)
            else:
                vehicle_data['vehicle_number'] = None

        if vehicle_data['vehicle_number'] is None: continue
        vehicle_data['direction'], vehicle_data['destination'] = getVehicleInfo(vehicle_data['vehicle_number'])
        vehicles.append(vehicle_data)
    
    return jsonify({
        'stop': stop_id,
        'routes': routes,
        'vehicles': vehicles
    })

@app.route('/', methods=['GET'])
def health_check():
    return 'Backend is running!', 200

if __name__ == "__main__":
    port = int(os.environ.get("PORT", 7860))
    app.run(host="0.0.0.0", port=port, debug=False)