42Cummer commited on
Commit
0083f50
·
verified ·
1 Parent(s): 1759e3e

extract individual vehicle info as well

Browse files
Files changed (1) hide show
  1. app.py +74 -17
app.py CHANGED
@@ -72,22 +72,6 @@ def getRouteList():
72
 
73
  routes[rid] = text
74
 
75
- for rid in range(500, 599): # all streetcar routes
76
- tag = soup.find("p", {"id": str(rid)})
77
- if tag and tag.text.strip():
78
- # Clean text:
79
- text = tag.get_text(strip=True)
80
-
81
- # Remove leading symbols like "*" or "== $0"
82
- text = re.sub(r"^[^A-Za-z0-9]+", "", text) # strip non-alphanumeric at start
83
- text = re.sub(r"\s*==.*$", "", text) # strip trailing "== $0" or similar
84
- text = re.sub(r"\s+", " ", text).strip() # collapse spaces
85
-
86
- # Replace dash between number and name with space (e.g., "26-Dupont" -> "26 Dupont")
87
- text = re.sub(r'(\d+)-([A-Za-z])', r'\1 \2', text)
88
-
89
- routes[rid] = text
90
-
91
  for rid in range(900, 999): # all express routes
92
  tag = soup.find("p", {"id": str(rid)})
93
  if tag and tag.text.strip():
@@ -174,7 +158,6 @@ def listVehiclesByRoute():
174
  pass
175
  # If it's mm:ss format, keep as is
176
 
177
- if delay is None: continue
178
  delay = ("-" if late else "+") + delay_value
179
 
180
  # Extract coordinates from JavaScript AddMarker calls
@@ -248,6 +231,76 @@ def serviceAlerts():
248
 
249
  return jsonify(alerts)
250
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
251
  @app.route('/seek', methods=['POST'])
252
  def seek():
253
  if not request.is_json:
@@ -430,6 +483,10 @@ def seek():
430
  else:
431
  vehicle_data['vehicle_number'] = None
432
 
 
 
 
 
433
  vehicles.append(vehicle_data)
434
 
435
  return jsonify({
 
72
 
73
  routes[rid] = text
74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  for rid in range(900, 999): # all express routes
76
  tag = soup.find("p", {"id": str(rid)})
77
  if tag and tag.text.strip():
 
158
  pass
159
  # If it's mm:ss format, keep as is
160
 
 
161
  delay = ("-" if late else "+") + delay_value
162
 
163
  # Extract coordinates from JavaScript AddMarker calls
 
231
 
232
  return jsonify(alerts)
233
 
234
+
235
+ def getVehicleInfo(vehicle_id):
236
+ url = f'https://www.transsee.ca/fleetfind?a=ttc&findtrack=1&q={vehicle_id}&Go=Go'
237
+ headers = {
238
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
239
+ 'Referer': 'https://www.transsee.ca/',
240
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
241
+ 'Accept-Language': 'en-US,en;q=0.9',
242
+ 'DNT': '1', # Do Not Track
243
+ 'Connection': 'keep-alive',
244
+ 'Upgrade-Insecure-Requests': '1'
245
+ }
246
+ try:
247
+ response = requests.get(url, headers=headers, timeout=10)
248
+ response.raise_for_status()
249
+ except Exception as e:
250
+ return jsonify({"error": f"Failed to fetch vehicle info: {e}"}), 500
251
+
252
+ soup = BeautifulSoup(response.text, 'html.parser')
253
+
254
+ # Find the specific paragraph with id=vehicle_id
255
+ p = soup.find("p", id=vehicle_id)
256
+ if not p:
257
+ return jsonify({"error": f"Vehicle {vehicle_id} not found"}), 404
258
+
259
+ # Get the HTML content to properly parse the structure
260
+ html_content = str(p)
261
+
262
+ # Extract direction (like "going D")
263
+ direction = None
264
+ direction_match = re.search(r'going\s+([A-Za-z0-9]+)', html_content)
265
+ if direction_match:
266
+ direction = direction_match.group(1)
267
+
268
+ # Extract destination - only the text before <br>
269
+ destination = None
270
+
271
+ # Debug: print the raw HTML
272
+ print(f"DEBUG - Raw HTML: {html_content}")
273
+
274
+ # Split by <br> and get only the first part
275
+ before_br = html_content.split('<br/>')[0].split('<br>')[0]
276
+ print(f"DEBUG - Before BR: {before_br}")
277
+
278
+ # Look for the quoted destination text that contains direction words
279
+ dest_match = re.search(r'"([^"]*(?:South|North|East|West)[^"]*)"', before_br)
280
+ if dest_match:
281
+ raw_dest = dest_match.group(1).strip()
282
+ print(f"DEBUG - Raw destination: {raw_dest}")
283
+ # Clean up - remove anything after "at " or "on " which indicates location details
284
+ clean_dest = re.sub(r'at\s+.*$', '', raw_dest)
285
+ destination = clean_dest.strip()
286
+ print(f"DEBUG - Clean destination: {destination}")
287
+ else:
288
+ print("DEBUG - No direction-containing quoted text found, using fallback")
289
+ # Fallback: extract text content and look for direction patterns
290
+ temp_soup = BeautifulSoup(before_br, 'html.parser')
291
+ text_content = temp_soup.get_text()
292
+ dest_match = re.search(r'((?:South|North|East|West) to [^=]*?)(?=\s*==|\s*$)', text_content)
293
+ if dest_match:
294
+ raw_dest = dest_match.group(1).strip()
295
+ clean_dest = re.sub(r'at\s+.*$', '', raw_dest)
296
+ destination = clean_dest.strip()
297
+
298
+ return jsonify({
299
+ "direction": direction,
300
+ "destination": destination
301
+ })
302
+
303
+
304
  @app.route('/seek', methods=['POST'])
305
  def seek():
306
  if not request.is_json:
 
483
  else:
484
  vehicle_data['vehicle_number'] = None
485
 
486
+ if vehicle_data['vehicle_number'] is None: continue
487
+ vehicle_info = getVehicleInfo(vehicle_data['vehicle_number'])
488
+ vehicle_data['direction'] = vehicle_info['direction']
489
+ vehicle_data['destination'] = vehicle_info['destination']
490
  vehicles.append(vehicle_data)
491
 
492
  return jsonify({