Spaces:
Paused
Paused
| from flask import Flask, render_template, request, jsonify | |
| from flask_cors import CORS | |
| import easyocr | |
| import re | |
| import requests | |
| from bs4 import BeautifulSoup | |
| import tempfile | |
| import os | |
| app = Flask(__name__) | |
| CORS(app) | |
| os.makedirs('/tmp/.EasyOCR', exist_ok=True) | |
| reader = easyocr.Reader( | |
| ['en'], | |
| gpu=False, | |
| download_enabled=True, | |
| model_storage_directory='/tmp/.EasyOCR', | |
| user_network_directory='/tmp/.EasyOCR' # ✅ Also tell it where user models are | |
| ) | |
| def get_vehicle_locations_and_occupancy(vehicle_numbers): | |
| url = "https://bustime.ttc.ca/gtfsrt/vehicles?debug" | |
| headers = { | |
| 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36', | |
| 'Referer': 'https://www.transsee.ca/', | |
| 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', | |
| 'Accept-Language': 'en-US,en;q=0.9', | |
| 'DNT': '1', # Do Not Track | |
| 'Connection': 'keep-alive', | |
| 'Upgrade-Insecure-Requests': '1' | |
| } | |
| try: | |
| response = requests.get(url, headers=headers, timeout=10) | |
| response.raise_for_status() | |
| except Exception as e: | |
| return jsonify({"error": f"Failed to fetch vehicle data: {e}"}), 500 | |
| text = response.text | |
| entities = text.split('entity {')[1:] # skip the header | |
| # Ensure vehicle_numbers are strings for comparison | |
| vehicle_numbers = set(str(num) for num in vehicle_numbers) | |
| results = [] | |
| for entity in entities: | |
| vehicle_id_matches = re.findall(r'vehicle\s*\{\s*id: "(\d+)"', entity) | |
| if not vehicle_id_matches: | |
| continue | |
| vehicle_id = vehicle_id_matches[-1] | |
| if vehicle_id not in vehicle_numbers: | |
| continue | |
| lat_match = re.search(r'latitude: (-?[\d\.]+)', entity) | |
| lon_match = re.search(r'longitude: (-?[\d\.]+)', entity) | |
| occ_match = re.search(r'occupancy_status: ([A-Z_]+)', entity) | |
| results.append({ | |
| 'vehicle_id': vehicle_id, | |
| 'latitude': float(lat_match.group(1)) if lat_match else None, | |
| 'longitude': float(lon_match.group(1)) if lon_match else None, | |
| 'occupancy_status': occ_match.group(1) if occ_match else None | |
| }) | |
| return results | |
| def get_vehicle_destinations(route, stopcall): | |
| url = f'https://www.transsee.ca/predict?s=ttc.{route}.{stopcall}' | |
| headers = { | |
| 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36', | |
| 'Referer': 'https://www.transsee.ca/', | |
| 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', | |
| 'Accept-Language': 'en-US,en;q=0.9', | |
| 'DNT': '1', # Do Not Track | |
| 'Connection': 'keep-alive', | |
| 'Upgrade-Insecure-Requests': '1' | |
| } | |
| try: | |
| response = requests.get(url, headers=headers, timeout=10) | |
| response.raise_for_status() | |
| except Exception as e: | |
| return jsonify({"error": f"Failed to fetch vehicle data: {e}"}), 500 | |
| soup = BeautifulSoup(response.text, 'html.parser') | |
| vehicle_info = {} | |
| # Use a regex to find the <p> tag with an id that starts with route and stopcall | |
| pattern = re.compile(rf'^{route}_{stopcall}_?\d*$') | |
| dest_par = soup.find('p', id=pattern) | |
| if dest_par: | |
| contents = dest_par.contents | |
| if len(contents) > 1 and "going" in contents[1]: | |
| stuff = contents[1].strip().split(" ") | |
| branch = stuff[1] if len(stuff) > 1 else None | |
| destination_text = dest_par.contents[3].strip() | |
| # Remove directional prefixes | |
| destination_text = re.sub(r'^(North|South|East|West)\s+', '', destination_text, flags=re.IGNORECASE) | |
| if branch: | |
| vehicle_info['destination'] = branch + " " + destination_text | |
| else: | |
| vehicle_info['destination'] = destination_text | |
| else: | |
| vehicle_info['destination'] = [] | |
| new_pattern = re.compile(rf'^{route}_{stopcall}\d*_\d*$') | |
| vehicles = soup.find_all('p', id=new_pattern) | |
| for p_tag in vehicles: | |
| vehicle_data = {} | |
| vehicle_link = p_tag.find('a', href="#MapMain") | |
| if vehicle_link and vehicle_link.text.strip().isdigit(): | |
| vehicle_data['vehicle_number'] = vehicle_link.text.strip() | |
| else: | |
| # Case 2: Vehicle number is in plain text like "Vehicle 3417 Load" | |
| text = p_tag.get_text() | |
| match = re.search(r'Vehicle\s+(\d+)', text) | |
| if match: | |
| vehicle_data['vehicle_number'] = match.group(1) | |
| span_element = p_tag.find('span', class_='tsfont') | |
| if span_element: | |
| destination_text = span_element.next_sibling.strip() | |
| branch = span_element.previous_sibling.strip() | |
| # Remove directional prefixes | |
| destination_text = re.sub(r'^(North|South|East|West)\s+', '', destination_text, flags=re.IGNORECASE) | |
| vehicle_data['destination'] = branch + " " + destination_text | |
| vehicle_info['destination'].append(vehicle_data) | |
| return vehicle_info | |
| def get_vehicle_delay(vehicle_number): | |
| url = f"https://www.transsee.ca/fleetfind?a=ttc&findtrack=1&q={vehicle_number}&Go=Go" | |
| headers = { | |
| 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36', | |
| 'Referer': 'https://www.transsee.ca/', | |
| 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', | |
| 'Accept-Language': 'en-US,en;q=0.9', | |
| 'DNT': '1', # Do Not Track | |
| 'Connection': 'keep-alive', | |
| 'Upgrade-Insecure-Requests': '1' | |
| } | |
| try: | |
| response = requests.get(url, headers=headers, timeout=10) | |
| response.raise_for_status() | |
| except Exception as e: | |
| return jsonify({"error": f"Failed to fetch vehicle data: {e}"}), 500 | |
| soup = BeautifulSoup(response.text, 'html.parser') | |
| delay_par = soup.find('p', id=f"{vehicle_number}") | |
| if delay_par: | |
| for span in delay_par.find_all('span', style=True): | |
| if 'color: light-dark' in span['style']: | |
| delay_span = span | |
| break | |
| if delay_span: | |
| delay_text = delay_span.get_text(strip=True) | |
| return delay_text if delay_text else "0" | |
| def health_check(): | |
| return 'Backend is running!', 200 | |
| def upload(): | |
| if 'image' not in request.files: | |
| return jsonify({'error': 'No image file uploaded'}), 400 | |
| image = request.files['image'] | |
| with tempfile.NamedTemporaryFile(delete=False, suffix='.jpg') as temp_file: | |
| image_path = temp_file.name | |
| image.save(image_path) | |
| results = reader.readtext(image_path) | |
| stop_id = None | |
| for _, text, conf in results: | |
| if "898882" in text: | |
| parts = text.lower().split() | |
| if len(parts) >= 2 and parts[1].isdigit(): | |
| stop_id = parts[1] | |
| break | |
| if stop_id is None: | |
| return jsonify({'error': 'Stop ID not found via OCR'}), 404 | |
| url = url = f"https://www.transsee.ca/smsstop?a=ttc&id={stop_id}" | |
| headers = { | |
| 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36', | |
| 'Referer': 'https://www.transsee.ca/', | |
| 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', | |
| 'Accept-Language': 'en-US,en;q=0.9', | |
| 'DNT': '1', # Do Not Track | |
| 'Connection': 'keep-alive', | |
| 'Upgrade-Insecure-Requests': '1' | |
| } | |
| response = requests.get(url, headers=headers) | |
| soup = BeautifulSoup(response.text, 'html.parser') | |
| routes = [a.text.strip() for a in soup.select('a[href^="stoplist?a=ttc&r="]')] | |
| return jsonify({ | |
| 'stop': stop_id, | |
| 'routes': routes | |
| }) | |
| def seek(): | |
| if not request.is_json: | |
| return jsonify({'error': 'Request must be JSON'}), 400 | |
| data = request.get_json() | |
| stop_id = data.get('stop') | |
| if not stop_id: | |
| return jsonify({'error': 'Stop ID is required'}), 404 | |
| url = url = f"https://www.transsee.ca/smsstop?a=ttc&id={stop_id}" | |
| headers = { | |
| 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36', | |
| 'Referer': 'https://www.transsee.ca/', | |
| 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', | |
| 'Accept-Language': 'en-US,en;q=0.9', | |
| 'DNT': '1', # Do Not Track | |
| 'Connection': 'keep-alive', | |
| 'Upgrade-Insecure-Requests': '1' | |
| } | |
| response = requests.get(url, headers=headers) | |
| soup = BeautifulSoup(response.text, 'html.parser') | |
| routes = [a.text.strip() for a in soup.select('a[href^="stoplist?a=ttc&r="]')] | |
| pattern = re.compile(r'^\d{2,3}_\d{3,5}_[1-4]$') | |
| vehicles = [] | |
| stopcall = None # Variable to store the common xxxxx value | |
| for p_tag in soup.find_all('p', id=pattern): | |
| vehicle_id = p_tag.get('id') | |
| id_parts = vehicle_id.split('_') | |
| route = id_parts[0] # Extract xx | |
| if stopcall is None: | |
| stopcall = id_parts[1] # Extract xxxxx | |
| vehicle_data = { | |
| 'id': route, | |
| } | |
| # Get minutes | |
| minpred_span = p_tag.find('span', class_='minpred') | |
| if minpred_span: | |
| secs_span = minpred_span.find('span', id=re.compile(r'SECS\d+')) # will always exist | |
| secs_value = secs_span.text.strip() | |
| # Check for the presence of an hour indicator within the same span | |
| if secs_span.find('span', text='h'): | |
| # Extract hours from the first content (text before the <span>) | |
| hours_text = secs_span.contents[0].strip() # Get the text node before the <span> | |
| hours_value = int(hours_text) if hours_text.isdigit() else 0 | |
| # Extract minutes from the third content (text after the <span>) | |
| minutes_text = secs_span.contents[2].strip() # Get the text node after the <span> | |
| minutes_value = int(minutes_text) if minutes_text.isdigit() else 0 | |
| else: | |
| hours_value = 0 | |
| minutes_value = int(secs_value) if secs_value.isdigit() else 0 | |
| # Convert to total minutes | |
| if secs_value == "0" or secs_value.startswith("!"): | |
| vehicle_data['minutes'] = "Now" | |
| else: | |
| total_minutes = minutes_value + (hours_value * 60) | |
| vehicle_data['minutes'] = f"{total_minutes} minutes" | |
| # Get delay text | |
| delay_span = None | |
| for span in p_tag.find_all('span', style=True): | |
| if 'color: light-dark' in span['style']: | |
| delay_span = span | |
| break | |
| if delay_span: | |
| vehicle_data['delay_text'] = delay_span.get_text(strip=True) # Extract just the text | |
| else: | |
| vehicle_data['delay_text'] = "0" # Optional: set to None if no delay found | |
| # Get vehicle number | |
| vehicle_link = p_tag.find('a', href="#MapMain") | |
| if vehicle_link and vehicle_link.text.strip().isdigit(): | |
| vehicle_data['vehicle_number'] = vehicle_link.text.strip() | |
| else: | |
| # Case 2: Vehicle number is in plain text like "Vehicle 3417 Load" | |
| text = p_tag.get_text() | |
| match = re.search(r'Vehicle\s+(\d+)', text) | |
| if match: | |
| vehicle_data['vehicle_number'] = match.group(1) | |
| # Get stop location description | |
| #text_nodes = [t for t in p_tag.stripped_strings] | |
| #location = None | |
| #for text in reversed(text_nodes): | |
| # if any(word in text.lower() for word in ['at', 'past', 'aprchg']): | |
| # location = text | |
| # break | |
| #if location: | |
| # vehicle_data['location'] = location | |
| vehicles.append(vehicle_data) | |
| return jsonify({ | |
| 'stop': stop_id, | |
| 'routes': routes, | |
| 'vehicles': vehicles, | |
| 'stopcall': stopcall | |
| }) | |
| def vehicle_status(): | |
| if not request.is_json: | |
| return jsonify({'error': 'Request must be JSON'}), 400 | |
| data = request.get_json() | |
| vehicle_numbers = data.get('vehicle_numbers') | |
| if not vehicle_numbers or not isinstance(vehicle_numbers, list): | |
| return jsonify({'error': 'vehicle_numbers must be a list'}), 400 | |
| info = get_vehicle_locations_and_occupancy(vehicle_numbers) | |
| return jsonify({'vehicles': info}) | |
| def destinations(): | |
| if not request.is_json: | |
| return jsonify({'error': 'Request must be JSON'}), 400 | |
| data = request.get_json() | |
| stopCall = data.get('stopCall') | |
| route = data.get('route') | |
| if not stopCall: | |
| return jsonify({'error': 'stopCall is required'}), 400 | |
| if not route: | |
| return jsonify({'error': 'route is required'}), 400 | |
| info = get_vehicle_destinations(route, stopCall) | |
| return info | |
| def vehicleinfo(): | |
| if not request.is_json: | |
| return jsonify({'error': 'Request must be JSON'}), 400 | |
| data = request.get_json() | |
| vehicle_number = data.get('vehicle_number') | |
| if not vehicle_number: | |
| return jsonify({'error': 'vehicle_number is required'}), 400 | |
| location = get_vehicle_locations_and_occupancy([vehicle_number])[0] | |
| delay = get_vehicle_delay(vehicle_number) | |
| return jsonify({ | |
| 'location': location, | |
| 'delay': delay | |
| }) | |
| #@app.route('/beta_testing') | |
| #def beta_testing(): | |
| # location = get_vehicle_locations_and_occupancy(['3166'])[0] | |
| # delay = get_vehicle_delay('3166') | |
| # return jsonify({ | |
| # 'location': location, | |
| # 'delay': delay | |
| # }) | |
| if __name__ == '__main__': | |
| app.run(debug=True, port=5000) | |