StopSeeker / app.py
42Cummer's picture
Update app.py
a3b664f verified
from flask import Flask, render_template, request, jsonify
from flask_cors import CORS
import easyocr
import re
import requests
from bs4 import BeautifulSoup
import tempfile
import os
app = Flask(__name__)
CORS(app)
os.makedirs('/tmp/.EasyOCR', exist_ok=True)
reader = easyocr.Reader(
['en'],
gpu=False,
download_enabled=True,
model_storage_directory='/tmp/.EasyOCR',
user_network_directory='/tmp/.EasyOCR' # ✅ Also tell it where user models are
)
def get_vehicle_locations_and_occupancy(vehicle_numbers):
url = "https://bustime.ttc.ca/gtfsrt/vehicles?debug"
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
'Referer': 'https://www.transsee.ca/',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.9',
'DNT': '1', # Do Not Track
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1'
}
try:
response = requests.get(url, headers=headers, timeout=10)
response.raise_for_status()
except Exception as e:
return jsonify({"error": f"Failed to fetch vehicle data: {e}"}), 500
text = response.text
entities = text.split('entity {')[1:] # skip the header
# Ensure vehicle_numbers are strings for comparison
vehicle_numbers = set(str(num) for num in vehicle_numbers)
results = []
for entity in entities:
vehicle_id_matches = re.findall(r'vehicle\s*\{\s*id: "(\d+)"', entity)
if not vehicle_id_matches:
continue
vehicle_id = vehicle_id_matches[-1]
if vehicle_id not in vehicle_numbers:
continue
lat_match = re.search(r'latitude: (-?[\d\.]+)', entity)
lon_match = re.search(r'longitude: (-?[\d\.]+)', entity)
occ_match = re.search(r'occupancy_status: ([A-Z_]+)', entity)
results.append({
'vehicle_id': vehicle_id,
'latitude': float(lat_match.group(1)) if lat_match else None,
'longitude': float(lon_match.group(1)) if lon_match else None,
'occupancy_status': occ_match.group(1) if occ_match else None
})
return results
def get_vehicle_destinations(route, stopcall):
url = f'https://www.transsee.ca/predict?s=ttc.{route}.{stopcall}'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
'Referer': 'https://www.transsee.ca/',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.9',
'DNT': '1', # Do Not Track
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1'
}
try:
response = requests.get(url, headers=headers, timeout=10)
response.raise_for_status()
except Exception as e:
return jsonify({"error": f"Failed to fetch vehicle data: {e}"}), 500
soup = BeautifulSoup(response.text, 'html.parser')
vehicle_info = {}
# Use a regex to find the <p> tag with an id that starts with route and stopcall
pattern = re.compile(rf'^{route}_{stopcall}_?\d*$')
dest_par = soup.find('p', id=pattern)
if dest_par:
contents = dest_par.contents
if len(contents) > 1 and "going" in contents[1]:
stuff = contents[1].strip().split(" ")
branch = stuff[1] if len(stuff) > 1 else None
destination_text = dest_par.contents[3].strip()
# Remove directional prefixes
destination_text = re.sub(r'^(North|South|East|West)\s+', '', destination_text, flags=re.IGNORECASE)
if branch:
vehicle_info['destination'] = branch + " " + destination_text
else:
vehicle_info['destination'] = destination_text
else:
vehicle_info['destination'] = []
new_pattern = re.compile(rf'^{route}_{stopcall}\d*_\d*$')
vehicles = soup.find_all('p', id=new_pattern)
for p_tag in vehicles:
vehicle_data = {}
vehicle_link = p_tag.find('a', href="#MapMain")
if vehicle_link and vehicle_link.text.strip().isdigit():
vehicle_data['vehicle_number'] = vehicle_link.text.strip()
else:
# Case 2: Vehicle number is in plain text like "Vehicle 3417 Load"
text = p_tag.get_text()
match = re.search(r'Vehicle\s+(\d+)', text)
if match:
vehicle_data['vehicle_number'] = match.group(1)
span_element = p_tag.find('span', class_='tsfont')
if span_element:
destination_text = span_element.next_sibling.strip()
branch = span_element.previous_sibling.strip()
# Remove directional prefixes
destination_text = re.sub(r'^(North|South|East|West)\s+', '', destination_text, flags=re.IGNORECASE)
vehicle_data['destination'] = branch + " " + destination_text
vehicle_info['destination'].append(vehicle_data)
return vehicle_info
def get_vehicle_delay(vehicle_number):
url = f"https://www.transsee.ca/fleetfind?a=ttc&findtrack=1&q={vehicle_number}&Go=Go"
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
'Referer': 'https://www.transsee.ca/',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.9',
'DNT': '1', # Do Not Track
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1'
}
try:
response = requests.get(url, headers=headers, timeout=10)
response.raise_for_status()
except Exception as e:
return jsonify({"error": f"Failed to fetch vehicle data: {e}"}), 500
soup = BeautifulSoup(response.text, 'html.parser')
delay_par = soup.find('p', id=f"{vehicle_number}")
if delay_par:
for span in delay_par.find_all('span', style=True):
if 'color: light-dark' in span['style']:
delay_span = span
break
if delay_span:
delay_text = delay_span.get_text(strip=True)
return delay_text if delay_text else "0"
@app.route('/', methods=['GET'])
def health_check():
return 'Backend is running!', 200
@app.route('/upload', methods=['POST'])
def upload():
if 'image' not in request.files:
return jsonify({'error': 'No image file uploaded'}), 400
image = request.files['image']
with tempfile.NamedTemporaryFile(delete=False, suffix='.jpg') as temp_file:
image_path = temp_file.name
image.save(image_path)
results = reader.readtext(image_path)
stop_id = None
for _, text, conf in results:
if "898882" in text:
parts = text.lower().split()
if len(parts) >= 2 and parts[1].isdigit():
stop_id = parts[1]
break
if stop_id is None:
return jsonify({'error': 'Stop ID not found via OCR'}), 404
url = url = f"https://www.transsee.ca/smsstop?a=ttc&id={stop_id}"
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
'Referer': 'https://www.transsee.ca/',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.9',
'DNT': '1', # Do Not Track
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1'
}
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, 'html.parser')
routes = [a.text.strip() for a in soup.select('a[href^="stoplist?a=ttc&r="]')]
return jsonify({
'stop': stop_id,
'routes': routes
})
@app.route('/seek', methods=['POST'])
def seek():
if not request.is_json:
return jsonify({'error': 'Request must be JSON'}), 400
data = request.get_json()
stop_id = data.get('stop')
if not stop_id:
return jsonify({'error': 'Stop ID is required'}), 404
url = url = f"https://www.transsee.ca/smsstop?a=ttc&id={stop_id}"
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
'Referer': 'https://www.transsee.ca/',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.9',
'DNT': '1', # Do Not Track
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1'
}
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, 'html.parser')
routes = [a.text.strip() for a in soup.select('a[href^="stoplist?a=ttc&r="]')]
pattern = re.compile(r'^\d{2,3}_\d{3,5}_[1-4]$')
vehicles = []
stopcall = None # Variable to store the common xxxxx value
for p_tag in soup.find_all('p', id=pattern):
vehicle_id = p_tag.get('id')
id_parts = vehicle_id.split('_')
route = id_parts[0] # Extract xx
if stopcall is None:
stopcall = id_parts[1] # Extract xxxxx
vehicle_data = {
'id': route,
}
# Get minutes
minpred_span = p_tag.find('span', class_='minpred')
if minpred_span:
secs_span = minpred_span.find('span', id=re.compile(r'SECS\d+')) # will always exist
secs_value = secs_span.text.strip()
# Check for the presence of an hour indicator within the same span
if secs_span.find('span', text='h'):
# Extract hours from the first content (text before the <span>)
hours_text = secs_span.contents[0].strip() # Get the text node before the <span>
hours_value = int(hours_text) if hours_text.isdigit() else 0
# Extract minutes from the third content (text after the <span>)
minutes_text = secs_span.contents[2].strip() # Get the text node after the <span>
minutes_value = int(minutes_text) if minutes_text.isdigit() else 0
else:
hours_value = 0
minutes_value = int(secs_value) if secs_value.isdigit() else 0
# Convert to total minutes
if secs_value == "0" or secs_value.startswith("!"):
vehicle_data['minutes'] = "Now"
else:
total_minutes = minutes_value + (hours_value * 60)
vehicle_data['minutes'] = f"{total_minutes} minutes"
# Get delay text
delay_span = None
for span in p_tag.find_all('span', style=True):
if 'color: light-dark' in span['style']:
delay_span = span
break
if delay_span:
vehicle_data['delay_text'] = delay_span.get_text(strip=True) # Extract just the text
else:
vehicle_data['delay_text'] = "0" # Optional: set to None if no delay found
# Get vehicle number
vehicle_link = p_tag.find('a', href="#MapMain")
if vehicle_link and vehicle_link.text.strip().isdigit():
vehicle_data['vehicle_number'] = vehicle_link.text.strip()
else:
# Case 2: Vehicle number is in plain text like "Vehicle 3417 Load"
text = p_tag.get_text()
match = re.search(r'Vehicle\s+(\d+)', text)
if match:
vehicle_data['vehicle_number'] = match.group(1)
# Get stop location description
#text_nodes = [t for t in p_tag.stripped_strings]
#location = None
#for text in reversed(text_nodes):
# if any(word in text.lower() for word in ['at', 'past', 'aprchg']):
# location = text
# break
#if location:
# vehicle_data['location'] = location
vehicles.append(vehicle_data)
return jsonify({
'stop': stop_id,
'routes': routes,
'vehicles': vehicles,
'stopcall': stopcall
})
@app.route('/vehicles', methods=['POST'])
def vehicle_status():
if not request.is_json:
return jsonify({'error': 'Request must be JSON'}), 400
data = request.get_json()
vehicle_numbers = data.get('vehicle_numbers')
if not vehicle_numbers or not isinstance(vehicle_numbers, list):
return jsonify({'error': 'vehicle_numbers must be a list'}), 400
info = get_vehicle_locations_and_occupancy(vehicle_numbers)
return jsonify({'vehicles': info})
@app.route('/destinations', methods=['POST'])
def destinations():
if not request.is_json:
return jsonify({'error': 'Request must be JSON'}), 400
data = request.get_json()
stopCall = data.get('stopCall')
route = data.get('route')
if not stopCall:
return jsonify({'error': 'stopCall is required'}), 400
if not route:
return jsonify({'error': 'route is required'}), 400
info = get_vehicle_destinations(route, stopCall)
return info
@app.route('/vehicleinfo', methods=['POST'])
def vehicleinfo():
if not request.is_json:
return jsonify({'error': 'Request must be JSON'}), 400
data = request.get_json()
vehicle_number = data.get('vehicle_number')
if not vehicle_number:
return jsonify({'error': 'vehicle_number is required'}), 400
location = get_vehicle_locations_and_occupancy([vehicle_number])[0]
delay = get_vehicle_delay(vehicle_number)
return jsonify({
'location': location,
'delay': delay
})
#@app.route('/beta_testing')
#def beta_testing():
# location = get_vehicle_locations_and_occupancy(['3166'])[0]
# delay = get_vehicle_delay('3166')
# return jsonify({
# 'location': location,
# 'delay': delay
# })
if __name__ == '__main__':
app.run(debug=True, port=5000)