ayayaya12 commited on
Commit
795f8d5
·
1 Parent(s): 04dc4d1

Update Recommend API

Browse files
Files changed (2) hide show
  1. src/prompts.py +2 -2
  2. src/recommendation_api.py +296 -56
src/prompts.py CHANGED
@@ -110,8 +110,8 @@ Câu trả lời của bạn:
110
  response_gen_prompt = ChatPromptTemplate.from_messages(
111
  [
112
  ("system", response_gen_template_string),
113
- MessagesPlaceholder(variable_name="chat_history_messages"), # Nơi chèn lịch sử chat
114
- ("human", "Thông tin tìm kiếm được (nếu có liên quan đến câu hỏi cuối cùng):\n{search_results}\n\nCâu hỏi cuối cùng của người dùng: {user_query}"), # Đặt câu hỏi và kết quả cuối cùng dạng human
115
  ]
116
  )
117
 
 
110
  response_gen_prompt = ChatPromptTemplate.from_messages(
111
  [
112
  ("system", response_gen_template_string),
113
+ MessagesPlaceholder(variable_name="chat_history_messages"),
114
+ ("human", "Thông tin tìm kiếm được (nếu có liên quan đến câu hỏi cuối cùng):\n{search_results}\n\nCâu hỏi cuối cùng của người dùng: {user_query}"),
115
  ]
116
  )
117
 
src/recommendation_api.py CHANGED
@@ -7,6 +7,8 @@ from sklearn.feature_extraction.text import TfidfVectorizer
7
  from sklearn.metrics.pairwise import cosine_similarity
8
  import numpy as np
9
  from pydantic import BaseModel, Field
 
 
10
 
11
  class TourRecommendationRequest(BaseModel):
12
  user_id: Optional[int] = Field(None)
@@ -25,37 +27,121 @@ class TourSummary(BaseModel):
25
 
26
  class TourRecommendationResponse(BaseModel):
27
  recommendations: List[TourSummary]
28
- recommendation_type: str = "content-based"
29
 
30
  class ContentBasedRecommender:
31
  def __init__(self, conn):
32
  self.conn = conn
 
 
 
 
 
 
 
 
33
  self.vectorizer = TfidfVectorizer(
34
- max_features=5000,
35
- stop_words='english',
36
- ngram_range=(1, 2)
 
 
 
 
37
  )
 
38
  self.field_weights = {
39
- 'title': 0.25,
40
- 'description': 0.20,
41
- 'destination': 0.25,
42
- 'departure_location': 0.15,
43
- 'region': 0.10,
44
- 'itinerary': 0.05
 
 
45
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
  def preprocess_text(self, text):
48
  if not text:
49
  return ""
 
 
 
50
  text = str(text).lower()
51
- text = re.sub(r'[^\w\s]', ' ', text)
 
 
52
  text = re.sub(r'\s+', ' ', text).strip()
53
- return text
 
 
 
 
54
 
55
  def preprocess_list(self, items):
56
  if not items:
57
  return ""
58
- return " ".join([self.preprocess_text(item) for item in items])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
  def preprocess_json(self, json_data):
61
  if not json_data:
@@ -65,19 +151,31 @@ class ContentBasedRecommender:
65
  data = json.loads(json_data)
66
  else:
67
  data = json_data
 
68
  text_values = []
 
69
  def extract_values(obj):
70
  if isinstance(obj, dict):
71
- for val in obj.values():
72
- extract_values(val)
 
 
 
 
 
 
73
  elif isinstance(obj, list):
74
  for item in obj:
75
  extract_values(item)
76
- elif obj:
77
- text_values.append(str(obj))
 
 
 
78
  extract_values(data)
79
  return " ".join(text_values)
80
- except:
 
81
  return ""
82
 
83
  def get_all_tours(self):
@@ -91,11 +189,20 @@ class ContentBasedRecommender:
91
  t.description,
92
  t.destination,
93
  t.region,
94
- t.itinerary
 
 
 
 
95
  FROM
96
  Tour t
 
 
97
  WHERE
98
  t.availability = true
 
 
 
99
  """)
100
  return cursor.fetchall()
101
 
@@ -103,15 +210,19 @@ class ContentBasedRecommender:
103
  with self.conn.cursor(cursor_factory=RealDictCursor) as cursor:
104
  cursor.execute("""
105
  SELECT
106
- h.tour_id
 
 
107
  FROM
108
  History h
109
  WHERE
110
  h.user_id = %s
111
  GROUP BY
112
  h.tour_id
 
 
113
  """, (user_id,))
114
- return [row['tour_id'] for row in cursor.fetchall()]
115
 
116
  def get_tour_by_id(self, tour_id):
117
  with self.conn.cursor(cursor_factory=RealDictCursor) as cursor:
@@ -124,63 +235,153 @@ class ContentBasedRecommender:
124
  t.description,
125
  t.destination,
126
  t.region,
127
- t.itinerary
 
 
 
 
128
  FROM
129
  Tour t
 
 
130
  WHERE
131
  t.tour_id = %s
 
 
 
132
  """, (tour_id,))
133
  return cursor.fetchone()
134
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  def create_tour_features(self, tours):
136
  tour_features = {}
 
137
  for tour in tours:
138
- title = self.preprocess_text(tour['title'])
139
- description = self.preprocess_text(tour['description'])
140
- departure_location = self.preprocess_text(tour['departure_location'])
141
- destination = self.preprocess_list(tour['destination'])
142
- region = self.preprocess_text(str(tour['region']))
143
- itinerary = self.preprocess_json(tour['itinerary'])
 
 
 
 
144
  combined_features = (
145
- f"{title} " * int(self.field_weights['title'] * 10) +
146
- f"{description} " * int(self.field_weights['description'] * 10) +
147
- f"{destination} " * int(self.field_weights['destination'] * 10) +
148
- f"{departure_location} " * int(self.field_weights['departure_location'] * 10) +
149
- f"{region} " * int(self.field_weights['region'] * 10) +
150
- f"{itinerary} " * int(self.field_weights['itinerary'] * 10)
 
 
151
  )
152
- tour_features[tour['tour_id']] = combined_features
 
 
153
  return tour_features
154
 
155
- def calculate_similarity(self, tour_features):
 
 
156
  tour_ids = list(tour_features.keys())
157
  feature_texts = [tour_features[tour_id] for tour_id in tour_ids]
158
- tfidf_matrix = self.vectorizer.fit_transform(feature_texts)
159
- cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
 
 
 
 
 
 
 
 
 
 
 
160
  similarity_dict = {}
 
161
  for i, tour_id in enumerate(tour_ids):
162
- similarity_dict[tour_id] = {
163
- tour_ids[j]: cosine_sim[i][j] for j in range(len(tour_ids)) if i != j
164
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
  return similarity_dict
166
 
167
  def recommend_similar_tours(self, tour_id, limit=3):
168
  all_tours = self.get_all_tours()
169
  target_tour = None
 
170
  for tour in all_tours:
171
  if tour.get('tour_id') == tour_id:
172
  target_tour = tour
173
  break
 
174
  if not target_tour:
175
  return []
176
- tour_features = self.create_tour_features(all_tours)
177
- similarity_dict = self.calculate_similarity(tour_features)
 
178
  if tour_id in similarity_dict:
179
  similar_tours = sorted(
180
  similarity_dict[tour_id].items(),
181
  key=lambda x: x[1],
182
  reverse=True
183
  )[:limit]
 
184
  recommended_tours = []
185
  for similar_tour_id, similarity_score in similar_tours:
186
  for tour in all_tours:
@@ -189,34 +390,65 @@ class ContentBasedRecommender:
189
  tour_copy['similarity_score'] = float(similarity_score)
190
  recommended_tours.append(tour_copy)
191
  break
 
192
  return recommended_tours
 
193
  return []
194
 
195
  def recommend_for_user(self, user_id, limit=3):
196
  user_history = self.get_user_history(user_id)
 
197
  if not user_history:
198
  return self.recommend_popular_tours(limit)
 
199
  all_tours = self.get_all_tours()
200
- tour_features = self.create_tour_features(all_tours)
201
- similarity_dict = self.calculate_similarity(tour_features)
202
  tour_scores = {}
 
 
203
  for tour in all_tours:
204
  tour_id = tour.get('tour_id')
205
- if tour_id is None or tour_id in user_history:
206
  continue
 
207
  total_similarity = 0
208
- count = 0
209
- for history_tour_id in user_history:
210
- if history_tour_id in similarity_dict and tour_id in similarity_dict[history_tour_id]:
211
- total_similarity += similarity_dict[history_tour_id][tour_id]
212
- count += 1
213
- if count > 0:
214
- tour_scores[tour_id] = total_similarity / count
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
215
  top_tours = sorted(
216
  tour_scores.items(),
217
  key=lambda x: x[1],
218
  reverse=True
219
  )[:limit]
 
220
  recommended_tours = []
221
  for tour_id, similarity_score in top_tours:
222
  for tour in all_tours:
@@ -225,6 +457,7 @@ class ContentBasedRecommender:
225
  tour_copy['similarity_score'] = float(similarity_score)
226
  recommended_tours.append(tour_copy)
227
  break
 
228
  return recommended_tours
229
 
230
  def recommend_popular_tours(self, limit=3):
@@ -238,21 +471,28 @@ class ContentBasedRecommender:
238
  t.description,
239
  t.destination,
240
  t.region,
241
- COUNT(b.booking_id) as booking_count
 
 
242
  FROM
243
  Tour t
244
  LEFT JOIN
245
  Departure d ON t.tour_id = d.tour_id
246
  LEFT JOIN
247
  Booking b ON d.departure_id = b.departure_id
 
 
248
  WHERE
249
  t.availability = true
250
  GROUP BY
251
- t.tour_id
 
252
  ORDER BY
253
- booking_count DESC
 
254
  LIMIT %s
255
  """, (limit,))
 
256
  popular_tours = cursor.fetchall()
257
  for tour in popular_tours:
258
  tour['similarity_score'] = None
 
7
  from sklearn.metrics.pairwise import cosine_similarity
8
  import numpy as np
9
  from pydantic import BaseModel, Field
10
+ from bs4 import BeautifulSoup
11
+ import math
12
 
13
  class TourRecommendationRequest(BaseModel):
14
  user_id: Optional[int] = Field(None)
 
27
 
28
  class TourRecommendationResponse(BaseModel):
29
  recommendations: List[TourSummary]
 
30
 
31
  class ContentBasedRecommender:
32
  def __init__(self, conn):
33
  self.conn = conn
34
+ vietnamese_stop_words = [
35
+ "và", "là", "của", "trong", "được", "có", "không", "cho", "với",
36
+ "tại", "bằng", "để", "này", "khi", "một", "những", "các", "đã",
37
+ "rồi", "lại", "nếu", "vì", "thì", "từ", "ra", "đến", "trên", "dưới",
38
+ "quý", "khách", "tham", "quan", "du", "lịch", "tour", "ngày", "đêm",
39
+ "ăn", "sáng", "trưa", "tối", "nghỉ", "khách", "sạn", "tự", "túc"
40
+ ]
41
+
42
  self.vectorizer = TfidfVectorizer(
43
+ max_features=8000,
44
+ stop_words=vietnamese_stop_words,
45
+ ngram_range=(1, 3),
46
+ min_df=1,
47
+ max_df=0.8,
48
+ token_pattern=r'[a-zA-ZÀ-ỹ]+',
49
+ lowercase=True
50
  )
51
+
52
  self.field_weights = {
53
+ 'title': 0.20,
54
+ 'destination': 0.30,
55
+ 'description': 0.15,
56
+ 'departure_location': 0.10,
57
+ 'region': 0.15,
58
+ 'itinerary': 0.10,
59
+ 'duration': 0.05,
60
+ 'attractions': 0.15
61
  }
62
+
63
+ self.region_proximity = {
64
+ 1: {1: 1.0, 2: 0.6, 3: 0.3},
65
+ 2: {1: 0.6, 2: 1.0, 3: 0.7},
66
+ 3: {1: 0.3, 2: 0.7, 3: 1.0}
67
+ }
68
+
69
+ def clean_html(self, text):
70
+ if not text:
71
+ return ""
72
+ try:
73
+ soup = BeautifulSoup(text, 'html.parser')
74
+ clean_text = soup.get_text()
75
+ clean_text = re.sub(r'\s+', ' ', clean_text).strip()
76
+ return clean_text
77
+ except:
78
+ return str(text)
79
 
80
  def preprocess_text(self, text):
81
  if not text:
82
  return ""
83
+
84
+ text = self.clean_html(text)
85
+
86
  text = str(text).lower()
87
+
88
+ text = re.sub(r'[^\w\sÀ-ỹ]', ' ', text)
89
+
90
  text = re.sub(r'\s+', ' ', text).strip()
91
+
92
+ words = text.split()
93
+ words = [word for word in words if len(word) >= 2]
94
+
95
+ return " ".join(words)
96
 
97
  def preprocess_list(self, items):
98
  if not items:
99
  return ""
100
+ processed_items = []
101
+ for item in items:
102
+ cleaned = self.preprocess_text(item)
103
+ if cleaned:
104
+ processed_items.append(cleaned)
105
+ return " ".join(processed_items)
106
+
107
+ def extract_attractions_from_itinerary(self, itinerary):
108
+ if not itinerary:
109
+ return ""
110
+
111
+ try:
112
+ if isinstance(itinerary, str):
113
+ data = json.loads(itinerary)
114
+ else:
115
+ data = itinerary
116
+
117
+ attractions = []
118
+
119
+ if isinstance(data, list):
120
+ for day in data:
121
+ if isinstance(day, dict):
122
+ description = day.get('description', '')
123
+ if description:
124
+ clean_desc = self.clean_html(description)
125
+ soup = BeautifulSoup(description, 'html.parser')
126
+ strong_tags = soup.find_all('strong')
127
+ for tag in strong_tags:
128
+ attractions.append(tag.get_text())
129
+
130
+ colored_spans = soup.find_all('span', style=lambda x: x and 'color' in x)
131
+ for span in colored_spans:
132
+ attractions.append(span.get_text())
133
+
134
+ clean_attractions = []
135
+ for attraction in attractions:
136
+ cleaned = self.preprocess_text(attraction)
137
+ if cleaned and len(cleaned) > 3:
138
+ clean_attractions.append(cleaned)
139
+
140
+ return " ".join(clean_attractions)
141
+
142
+ except Exception as e:
143
+ print(f"Error extracting attractions: {e}")
144
+ return ""
145
 
146
  def preprocess_json(self, json_data):
147
  if not json_data:
 
151
  data = json.loads(json_data)
152
  else:
153
  data = json_data
154
+
155
  text_values = []
156
+
157
  def extract_values(obj):
158
  if isinstance(obj, dict):
159
+ for key, val in obj.items():
160
+ if key.lower() in ['title', 'description', 'name', 'location']:
161
+ if val:
162
+ clean_val = self.clean_html(str(val))
163
+ if clean_val:
164
+ text_values.append(clean_val)
165
+ else:
166
+ extract_values(val)
167
  elif isinstance(obj, list):
168
  for item in obj:
169
  extract_values(item)
170
+ elif obj and len(str(obj)) > 3:
171
+ clean_val = self.clean_html(str(obj))
172
+ if clean_val:
173
+ text_values.append(clean_val)
174
+
175
  extract_values(data)
176
  return " ".join(text_values)
177
+ except Exception as e:
178
+ print(f"Error preprocessing JSON: {e}")
179
  return ""
180
 
181
  def get_all_tours(self):
 
189
  t.description,
190
  t.destination,
191
  t.region,
192
+ t.itinerary,
193
+ t.max_participants,
194
+ MIN(d.price_adult) as min_price,
195
+ MAX(d.price_adult) as max_price,
196
+ AVG(d.price_adult) as avg_price
197
  FROM
198
  Tour t
199
+ LEFT JOIN
200
+ Departure d ON t.tour_id = d.tour_id AND d.availability = true
201
  WHERE
202
  t.availability = true
203
+ GROUP BY
204
+ t.tour_id, t.title, t.duration, t.departure_location,
205
+ t.description, t.destination, t.region, t.itinerary, t.max_participants
206
  """)
207
  return cursor.fetchall()
208
 
 
210
  with self.conn.cursor(cursor_factory=RealDictCursor) as cursor:
211
  cursor.execute("""
212
  SELECT
213
+ h.tour_id,
214
+ COUNT(*) as interaction_count,
215
+ MAX(h.timestamp) as last_interaction
216
  FROM
217
  History h
218
  WHERE
219
  h.user_id = %s
220
  GROUP BY
221
  h.tour_id
222
+ ORDER BY
223
+ interaction_count DESC, last_interaction DESC
224
  """, (user_id,))
225
+ return cursor.fetchall()
226
 
227
  def get_tour_by_id(self, tour_id):
228
  with self.conn.cursor(cursor_factory=RealDictCursor) as cursor:
 
235
  t.description,
236
  t.destination,
237
  t.region,
238
+ t.itinerary,
239
+ t.max_participants,
240
+ MIN(d.price_adult) as min_price,
241
+ MAX(d.price_adult) as max_price,
242
+ AVG(d.price_adult) as avg_price
243
  FROM
244
  Tour t
245
+ LEFT JOIN
246
+ Departure d ON t.tour_id = d.tour_id AND d.availability = true
247
  WHERE
248
  t.tour_id = %s
249
+ GROUP BY
250
+ t.tour_id, t.title, t.duration, t.departure_location,
251
+ t.description, t.destination, t.region, t.itinerary, t.max_participants
252
  """, (tour_id,))
253
  return cursor.fetchone()
254
 
255
+ def extract_duration_days(self, duration):
256
+ if not duration:
257
+ return 0
258
+
259
+ numbers = re.findall(r'\d+', duration)
260
+ if numbers:
261
+ return int(numbers[0])
262
+ return 0
263
+
264
+ def calculate_price_similarity(self, price1, price2):
265
+ if not price1 or not price2:
266
+ return 0.5
267
+
268
+ price1 = float(price1)
269
+ price2 = float(price2)
270
+
271
+ max_price = max(price1, price2)
272
+ min_price = min(price1, price2)
273
+
274
+ if max_price == 0:
275
+ return 1.0
276
+
277
+ ratio = min_price / max_price
278
+ return ratio
279
+
280
  def create_tour_features(self, tours):
281
  tour_features = {}
282
+
283
  for tour in tours:
284
+ title = self.preprocess_text(tour.get('title', ''))
285
+ description = self.preprocess_text(tour.get('description', ''))
286
+ departure_location = self.preprocess_text(tour.get('departure_location', ''))
287
+ destination = self.preprocess_list(tour.get('destination', []))
288
+ region = self.preprocess_text(str(tour.get('region', '')))
289
+ duration = self.preprocess_text(tour.get('duration', ''))
290
+
291
+ itinerary = self.preprocess_json(tour.get('itinerary'))
292
+ attractions = self.extract_attractions_from_itinerary(tour.get('itinerary'))
293
+
294
  combined_features = (
295
+ f"{title} " * int(self.field_weights['title'] * 20) +
296
+ f"{destination} " * int(self.field_weights['destination'] * 20) +
297
+ f"{description} " * int(self.field_weights['description'] * 20) +
298
+ f"{departure_location} " * int(self.field_weights['departure_location'] * 20) +
299
+ f"{region} " * int(self.field_weights['region'] * 20) +
300
+ f"{itinerary} " * int(self.field_weights['itinerary'] * 20) +
301
+ f"{duration} " * int(self.field_weights['duration'] * 20) +
302
+ f"{attractions} " * int(self.field_weights['attractions'] * 20)
303
  )
304
+
305
+ tour_features[tour['tour_id']] = combined_features.strip()
306
+
307
  return tour_features
308
 
309
+ def calculate_enhanced_similarity(self, tours):
310
+ tour_features = self.create_tour_features(tours)
311
+
312
  tour_ids = list(tour_features.keys())
313
  feature_texts = [tour_features[tour_id] for tour_id in tour_ids]
314
+
315
+ if not feature_texts or all(not text.strip() for text in feature_texts):
316
+ return {}
317
+
318
+ try:
319
+ tfidf_matrix = self.vectorizer.fit_transform(feature_texts)
320
+ text_similarity = cosine_similarity(tfidf_matrix, tfidf_matrix)
321
+ except Exception as e:
322
+ print(f"Error in TF-IDF calculation: {e}")
323
+ return {}
324
+
325
+ tour_lookup = {tour['tour_id']: tour for tour in tours}
326
+
327
  similarity_dict = {}
328
+
329
  for i, tour_id in enumerate(tour_ids):
330
+ similarity_dict[tour_id] = {}
331
+ tour_i = tour_lookup[tour_id]
332
+
333
+ for j, other_tour_id in enumerate(tour_ids):
334
+ if i == j:
335
+ continue
336
+
337
+ tour_j = tour_lookup[other_tour_id]
338
+
339
+ text_sim = text_similarity[i][j]
340
+
341
+ region_i = tour_i.get('region', 1)
342
+ region_j = tour_j.get('region', 1)
343
+ region_sim = self.region_proximity.get(region_i, {}).get(region_j, 0.3)
344
+
345
+ duration_i = self.extract_duration_days(tour_i.get('duration'))
346
+ duration_j = self.extract_duration_days(tour_j.get('duration'))
347
+ duration_sim = 1.0 if duration_i == duration_j else 0.7 if abs(duration_i - duration_j) <= 1 else 0.3
348
+
349
+ price_i = tour_i.get('avg_price')
350
+ price_j = tour_j.get('avg_price')
351
+ price_sim = self.calculate_price_similarity(price_i, price_j)
352
+
353
+ final_similarity = (
354
+ text_sim * 0.6 +
355
+ region_sim * 0.2 +
356
+ duration_sim * 0.1 +
357
+ price_sim * 0.1
358
+ )
359
+
360
+ similarity_dict[tour_id][other_tour_id] = final_similarity
361
+
362
  return similarity_dict
363
 
364
  def recommend_similar_tours(self, tour_id, limit=3):
365
  all_tours = self.get_all_tours()
366
  target_tour = None
367
+
368
  for tour in all_tours:
369
  if tour.get('tour_id') == tour_id:
370
  target_tour = tour
371
  break
372
+
373
  if not target_tour:
374
  return []
375
+
376
+ similarity_dict = self.calculate_enhanced_similarity(all_tours)
377
+
378
  if tour_id in similarity_dict:
379
  similar_tours = sorted(
380
  similarity_dict[tour_id].items(),
381
  key=lambda x: x[1],
382
  reverse=True
383
  )[:limit]
384
+
385
  recommended_tours = []
386
  for similar_tour_id, similarity_score in similar_tours:
387
  for tour in all_tours:
 
390
  tour_copy['similarity_score'] = float(similarity_score)
391
  recommended_tours.append(tour_copy)
392
  break
393
+
394
  return recommended_tours
395
+
396
  return []
397
 
398
  def recommend_for_user(self, user_id, limit=3):
399
  user_history = self.get_user_history(user_id)
400
+
401
  if not user_history:
402
  return self.recommend_popular_tours(limit)
403
+
404
  all_tours = self.get_all_tours()
405
+ similarity_dict = self.calculate_enhanced_similarity(all_tours)
406
+
407
  tour_scores = {}
408
+ total_interactions = sum(h['interaction_count'] for h in user_history)
409
+
410
  for tour in all_tours:
411
  tour_id = tour.get('tour_id')
412
+ if tour_id is None or any(h['tour_id'] == tour_id for h in user_history):
413
  continue
414
+
415
  total_similarity = 0
416
+ total_weight = 0
417
+
418
+ for history_item in user_history:
419
+ history_tour_id = history_item['tour_id']
420
+ interaction_weight = history_item['interaction_count'] / total_interactions
421
+
422
+ if (history_tour_id in similarity_dict and
423
+ tour_id in similarity_dict[history_tour_id]):
424
+
425
+ similarity = similarity_dict[history_tour_id][tour_id]
426
+ total_similarity += similarity * interaction_weight
427
+ total_weight += interaction_weight
428
+
429
+ if total_weight > 0:
430
+ tour_scores[tour_id] = total_similarity / total_weight
431
+
432
+ user_regions = set()
433
+ for history_item in user_history:
434
+ for tour in all_tours:
435
+ if tour['tour_id'] == history_item['tour_id']:
436
+ user_regions.add(tour.get('region'))
437
+ break
438
+
439
+ for tour_id, score in tour_scores.items():
440
+ for tour in all_tours:
441
+ if tour['tour_id'] == tour_id:
442
+ if tour.get('region') not in user_regions:
443
+ tour_scores[tour_id] = score * 1.1
444
+ break
445
+
446
  top_tours = sorted(
447
  tour_scores.items(),
448
  key=lambda x: x[1],
449
  reverse=True
450
  )[:limit]
451
+
452
  recommended_tours = []
453
  for tour_id, similarity_score in top_tours:
454
  for tour in all_tours:
 
457
  tour_copy['similarity_score'] = float(similarity_score)
458
  recommended_tours.append(tour_copy)
459
  break
460
+
461
  return recommended_tours
462
 
463
  def recommend_popular_tours(self, limit=3):
 
471
  t.description,
472
  t.destination,
473
  t.region,
474
+ COUNT(DISTINCT b.booking_id) as booking_count,
475
+ AVG(r.average_rating) as avg_rating,
476
+ COUNT(DISTINCT r.review_id) as review_count
477
  FROM
478
  Tour t
479
  LEFT JOIN
480
  Departure d ON t.tour_id = d.tour_id
481
  LEFT JOIN
482
  Booking b ON d.departure_id = b.departure_id
483
+ LEFT JOIN
484
+ Review r ON t.tour_id = r.tour_id
485
  WHERE
486
  t.availability = true
487
  GROUP BY
488
+ t.tour_id, t.title, t.duration, t.departure_location,
489
+ t.description, t.destination, t.region
490
  ORDER BY
491
+ (COUNT(DISTINCT b.booking_id) * 0.6 +
492
+ COALESCE(AVG(r.average_rating), 3.0) * COUNT(DISTINCT r.review_id) * 0.4) DESC
493
  LIMIT %s
494
  """, (limit,))
495
+
496
  popular_tours = cursor.fetchall()
497
  for tour in popular_tours:
498
  tour['similarity_score'] = None