Gini-Eddie commited on
Commit
dbd9716
·
verified ·
1 Parent(s): 1d41ab0

Upload matching_engine.py

Browse files
Files changed (1) hide show
  1. matching_engine.py +232 -0
matching_engine.py ADDED
@@ -0,0 +1,232 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ UzoAgro AI - Spatio-Temporal Logistics Matching Engine
3
+
4
+ Calculates intelligent driver-to-cargo matches using a 5-dimensional scoring model:
5
+ 1. Capacity Fit (Binary Mask): Verifies strict tonnage requirements.
6
+ 2. Temporal Fit: Scores schedule alignment (0-2 days tolerance).
7
+ 3. Deadhead Score: Proximity for empty pickup transit.
8
+ 4. Corridor Score (Vector Deviation): Cross-track distance from backhaul trajectory.
9
+ 5. Cargo Affinity Matrix: Intelligently matches exact crops, broad categories, and safe pivots.
10
+
11
+ Outputs `data/matches.csv` containing the top matched drivers based on a weighted composite score.
12
+ """
13
+
14
+ import os
15
+ import math
16
+ from datetime import datetime, timedelta
17
+ import numpy as np
18
+ import pandas as pd
19
+
20
+ # ----------------- Cargo Matrix Definitions -----------------
21
+ CROP_CATEGORIES = {
22
+ "Grains": ["Rice", "Maize", "Beans", "Millet", "Sorghum"],
23
+ "Tubers": ["Yam", "Cassava", "Potatoes"],
24
+ "Perishables": ["Tomatoes", "Onions", "Peppers"]
25
+ }
26
+
27
+ def get_category(crop):
28
+ """Return the category name for a given crop."""
29
+ for category, crops in CROP_CATEGORIES.items():
30
+ if crop in crops:
31
+ return category
32
+ return "Unknown"
33
+
34
+ # ----------------- I/O & Parsing -----------------
35
+ def load_data(drivers_path="data/drivers.csv", requests_path="data/requests.csv"):
36
+ drivers = pd.read_csv(drivers_path)
37
+ requests = pd.read_csv(requests_path)
38
+
39
+ # Parse Dates
40
+ if "available_date" in drivers.columns:
41
+ drivers["available_date"] = pd.to_datetime(drivers["available_date"], errors="coerce").dt.normalize()
42
+ if "requested_date" in requests.columns:
43
+ requests["requested_date"] = pd.to_datetime(requests["requested_date"], errors="coerce").dt.normalize()
44
+
45
+ return drivers, requests
46
+
47
+ # ----------------- Core Mathematical Operations -----------------
48
+ def compute_distance(lat1, lon1, lat2, lon2):
49
+ return np.sqrt((lat1 - lat2) ** 2 + (lon1 - lon2) ** 2)
50
+
51
+ def compute_cross_track_deviation(A_lat, A_lon, B_lat, B_lon, P_lat, P_lon):
52
+ AB_lat = B_lat - A_lat
53
+ AB_lon = B_lon - A_lon
54
+ AP_lat = P_lat - A_lat
55
+ AP_lon = P_lon - A_lon
56
+
57
+ norm_AB = np.sqrt(AB_lat**2 + AB_lon**2)
58
+
59
+ with np.errstate(divide='ignore', invalid='ignore'):
60
+ cross_prod = np.abs(AB_lon * AP_lat - AB_lat * AP_lon)
61
+ deviation = np.divide(cross_prod, norm_AB)
62
+ deviation = np.where(norm_AB == 0, compute_distance(A_lat, A_lon, P_lat, P_lon), deviation)
63
+
64
+ return deviation
65
+
66
+ # ----------------- Feature Engineering -----------------
67
+ def extract_temporal_score(driver_dates, request_date):
68
+ if pd.isna(request_date):
69
+ return np.ones(len(driver_dates))
70
+
71
+ days_diff = np.abs((driver_dates - request_date).dt.days).to_numpy(dtype=float)
72
+
73
+ conditions = [days_diff == 0, days_diff == 1, days_diff == 2]
74
+ choices = [1.0, 0.8, 0.4]
75
+ return np.select(conditions, choices, default=0.0)
76
+
77
+ def extract_affinity_score(driver_crops_series, request_crop):
78
+ """
79
+ Evaluates cargo compatibility.
80
+ - Exact crop match = 1.0
81
+ - Same category match = 0.8
82
+ - Safe pivot (Grains <-> Tubers) = 0.4
83
+ - Unsafe pivot (Perishables to Dry) = 0.0
84
+ """
85
+ req_cat = get_category(request_crop)
86
+ scores = []
87
+
88
+ for allowed_str in driver_crops_series:
89
+ if pd.isna(allowed_str):
90
+ scores.append(0.0)
91
+ continue
92
+
93
+ driver_crops = allowed_str.split("|")
94
+
95
+ # 1. Exact Match
96
+ if request_crop in driver_crops:
97
+ scores.append(1.0)
98
+ continue
99
+
100
+ # Determine categories the driver handles
101
+ driver_categories = {get_category(c) for c in driver_crops}
102
+
103
+ # 2. Category Match
104
+ if req_cat in driver_categories:
105
+ scores.append(0.8)
106
+ continue
107
+
108
+ # 3. Safe Pivots (Grains and Tubers are cross-compatible dry/hardy goods)
109
+ safe_pivots = [{"Grains", "Tubers"}]
110
+ is_pivot = False
111
+ for pair in safe_pivots:
112
+ if req_cat in pair and any(dc in pair for dc in driver_categories):
113
+ scores.append(0.4)
114
+ is_pivot = True
115
+ break
116
+
117
+ if is_pivot:
118
+ continue
119
+
120
+ # 4. Unsafe Pivot (e.g., Perishables to Grains)
121
+ scores.append(0.0)
122
+
123
+ return np.array(scores, dtype=float)
124
+
125
+ def compute_scores(drivers_df, request_row):
126
+ p_lat, p_lon = float(request_row.get("pickup_lat")), float(request_row.get("pickup_lon"))
127
+ d_lat, d_lon = float(request_row.get("dropoff_lat")), float(request_row.get("dropoff_lon"))
128
+ req_cap = float(request_row.get("required_capacity", 0))
129
+ req_date = request_row.get("requested_date")
130
+ req_crop = request_row.get("crop_type", "")
131
+
132
+ curr_lats = drivers_df["current_lat"].to_numpy(dtype=float)
133
+ curr_lons = drivers_df["current_lon"].to_numpy(dtype=float)
134
+ home_lats = drivers_df.get("home_base_lat", drivers_df["current_lat"]).to_numpy(dtype=float)
135
+ home_lons = drivers_df.get("home_base_lon", drivers_df["current_lon"]).to_numpy(dtype=float)
136
+
137
+ avail_caps = drivers_df.get("available_capacity", pd.Series(np.zeros(len(drivers_df)))).to_numpy(dtype=float)
138
+ driver_dates = drivers_df.get("available_date")
139
+ driver_crops = drivers_df.get("allowed_crops")
140
+
141
+ # 1. Capacity
142
+ capacity_score = (avail_caps >= req_cap).astype(float)
143
+
144
+ # 2. Temporal
145
+ time_score = extract_temporal_score(driver_dates, req_date)
146
+
147
+ # 3. Cargo Affinity
148
+ affinity_score = extract_affinity_score(driver_crops, req_crop)
149
+
150
+ # 4. Deadhead
151
+ deadhead_dists = compute_distance(curr_lats, curr_lons, p_lat, p_lon)
152
+
153
+ # 5. Corridor
154
+ pickup_dev = compute_cross_track_deviation(curr_lats, curr_lons, home_lats, home_lons, p_lat, p_lon)
155
+ dropoff_dev = compute_cross_track_deviation(curr_lats, curr_lons, home_lats, home_lons, d_lat, d_lon)
156
+ total_dev = pickup_dev + dropoff_dev
157
+
158
+ def inv_normalize(arr):
159
+ minv, maxv = arr.min(), arr.max()
160
+ if maxv - minv <= 1e-12:
161
+ return np.ones_like(arr, dtype=float)
162
+ return 1.0 - ((arr - minv) / (maxv - minv))
163
+
164
+ features_df = pd.DataFrame({
165
+ "driver_id": drivers_df["driver_id"].values,
166
+ "driver_name": drivers_df["name"].values,
167
+ "capacity_score": capacity_score,
168
+ "time_score": time_score,
169
+ "affinity_score": affinity_score,
170
+ "deadhead_score": inv_normalize(deadhead_dists),
171
+ "corridor_score": inv_normalize(total_dev)
172
+ })
173
+
174
+ return features_df
175
+
176
+ # ----------------- Execution & Ranking -----------------
177
+ def run_matching_engine(drivers_df, requests_df, top_k=3):
178
+ matches = []
179
+
180
+ # Updated Algorithm Weights (Total = 1.0)
181
+ W_DEADHEAD = 0.25
182
+ W_CORRIDOR = 0.35
183
+ W_AFFINITY = 0.20
184
+ W_TIME = 0.20
185
+
186
+ for _, req in requests_df.iterrows():
187
+ features = compute_scores(drivers_df, req)
188
+
189
+ # Calculate composite score (Capacity acts as a strict multiplier mask)
190
+ features["final_score"] = (
191
+ (W_DEADHEAD * features["deadhead_score"]) +
192
+ (W_CORRIDOR * features["corridor_score"]) +
193
+ (W_AFFINITY * features["affinity_score"]) +
194
+ (W_TIME * features["time_score"])
195
+ ) * features["capacity_score"]
196
+
197
+ top = features.sort_values(by="final_score", ascending=False).head(top_k)
198
+
199
+ for _, row in top.iterrows():
200
+ # Only save matches that are viable (score > 0)
201
+ if float(row["final_score"]) > 0:
202
+ matches.append({
203
+ "request_id": req["request_id"],
204
+ "driver_id": row["driver_id"],
205
+ "driver_name": str(row["driver_name"]),
206
+ "final_score": float(row["final_score"]),
207
+ "capacity_score": float(row["capacity_score"]),
208
+ "time_score": float(row["time_score"]),
209
+ "affinity_score": float(row["affinity_score"]),
210
+ "deadhead_score": float(row["deadhead_score"]),
211
+ "corridor_score": float(row["corridor_score"])
212
+ })
213
+
214
+ return pd.DataFrame(matches)
215
+
216
+ # ----------------- Entry Point -----------------
217
+ def main():
218
+ drivers_path = os.path.join("data", "drivers.csv")
219
+ requests_path = os.path.join("data", "requests.csv")
220
+
221
+ if not os.path.exists(drivers_path) or not os.path.exists(requests_path):
222
+ raise FileNotFoundError("Missing datasets. Ensure data/drivers.csv and data/requests.csv exist.")
223
+
224
+ drivers_df, requests_df = load_data(drivers_path, requests_path)
225
+ matches_df = run_matching_engine(drivers_df, requests_df, top_k=3)
226
+
227
+ out_path = os.path.join("data", "matches.csv")
228
+ matches_df.to_csv(out_path, index=False, encoding="utf-8-sig")
229
+ print(f"Intelligence Engine Complete. Saved {len(matches_df)} matches to {out_path}.")
230
+
231
+ if __name__ == "__main__":
232
+ main()