yigitcanozdemir commited on
Commit
a7dcca0
ยท
1 Parent(s): af6b243

Refactor recommendation engine and similarity calculator: remove unnecessary print statements and update status messages for clarity.

Browse files
components/similarity.py CHANGED
@@ -21,7 +21,7 @@ class SimilarityCalculator:
21
  ) -> Dict[str, Any]:
22
  if filtered_data.empty:
23
  return {
24
- "status": "โš ๏ธ No results found with current filters.",
25
  "results": [],
26
  "search_time": 0,
27
  "total_candidates": 0,
@@ -30,7 +30,6 @@ class SimilarityCalculator:
30
  start_time = time.time()
31
  positive_themes = features.positive_themes
32
  negative_themes = features.negative_themes
33
- print(f"๐Ÿ” Calculating similarity for query: {positive_themes}")
34
 
35
  positive_query_embeddings_np = self.model.encode(
36
  positive_themes, convert_to_numpy=True
@@ -78,18 +77,9 @@ class SimilarityCalculator:
78
  else:
79
  combined_embedding = avg_positive
80
 
81
- print("Positive query embedding", avg_positive)
82
-
83
  similarities = self.model.similarity(combined_embedding, document_embeddings)
84
  similarities = similarities[0]
85
 
86
- print("Magnitude of avg_positive:", torch.norm(avg_positive))
87
- if negative_themes is not None and len(negative_themes) > 0:
88
- print("Magnitude of avg_negative:", torch.norm(avg_negative))
89
- print("Magnitude of combined_embedding:", torch.norm(combined_embedding))
90
- print("Mean:", similarities.mean())
91
- print("Max:", similarities.max())
92
- print("Std:", similarities.std())
93
  quality_config = QUALITY_LEVELS.get(features.quality_level, {})
94
  rating_weight = quality_config.get("rating_weight")
95
  hybrid_scores = self._calculate_hybrid_score(
@@ -133,7 +123,7 @@ class SimilarityCalculator:
133
  search_time = end_time - start_time
134
 
135
  return {
136
- "status": "โœ… Search completed successfully.",
137
  "results": results,
138
  "search_time": search_time,
139
  "total_candidates": len(filtered_data),
 
21
  ) -> Dict[str, Any]:
22
  if filtered_data.empty:
23
  return {
24
+ "status": "No results found with current filters.",
25
  "results": [],
26
  "search_time": 0,
27
  "total_candidates": 0,
 
30
  start_time = time.time()
31
  positive_themes = features.positive_themes
32
  negative_themes = features.negative_themes
 
33
 
34
  positive_query_embeddings_np = self.model.encode(
35
  positive_themes, convert_to_numpy=True
 
77
  else:
78
  combined_embedding = avg_positive
79
 
 
 
80
  similarities = self.model.similarity(combined_embedding, document_embeddings)
81
  similarities = similarities[0]
82
 
 
 
 
 
 
 
 
83
  quality_config = QUALITY_LEVELS.get(features.quality_level, {})
84
  rating_weight = quality_config.get("rating_weight")
85
  hybrid_scores = self._calculate_hybrid_score(
 
123
  search_time = end_time - start_time
124
 
125
  return {
126
+ "status": "Search completed successfully.",
127
  "results": results,
128
  "search_time": search_time,
129
  "total_candidates": len(filtered_data),
models/recommendation_engine.py CHANGED
@@ -22,85 +22,59 @@ class RecommendationEngine:
22
  self.similarity_calc = SimilarityCalculator(self.model)
23
  self.filter = MovieFilter()
24
 
25
- print(f"โœ… Recommendation engine initialized with {len(self.data)} items.")
26
-
27
  def get_recommendations(self, user_query: str, top_k: int = 40):
28
- print(f"๐Ÿš€ Starting recommendation process for query: '{user_query}'")
29
  if not user_query.strip():
30
- return "โš ๏ธ Please enter some text.", None
31
 
32
  try:
33
- print("๐Ÿ“ Parsing user query...")
34
  start_time = time.time()
35
  features = self._parse_user_query(user_query)
36
- parse_time = time.time() - start_time
37
- print(f"โœ… Query parsed in {parse_time:.4f} seconds")
38
-
39
- print("๐Ÿ” Applying filters...")
40
- start_time = time.time()
41
  filtered_data = self.filter.apply_filters(self.data, features)
42
- filter_time = time.time() - start_time
43
- print(f"โœ… Filters applied in {filter_time:.4f} seconds")
44
- print(f"๐Ÿ” Filtered data contains {len(filtered_data)} items.")
45
- print("๐Ÿ”ง Preparing query input...")
46
- print(
47
- f"๐Ÿ“ Query text for embedding: Positive ['{features.positive_themes}'], Negative [{features.negative_themes}]"
48
- )
49
- print("๐Ÿงฎ Starting similarity calculation...")
50
- start_time = time.time()
51
  try:
52
  search_results = self.similarity_calc.calculate_similarity(
53
  features, filtered_data, top_k
54
  )
55
- similarity_time = time.time() - start_time
56
- print(
57
- f"โœ… Similarity calculation completed in {similarity_time:.4f} seconds"
58
- )
59
-
60
  except Exception as similarity_error:
61
- print(f"โŒ Error in similarity calculation: {str(similarity_error)}")
62
- print(f"๐Ÿ“Š Traceback: {traceback.format_exc()}")
63
 
64
- print("๐Ÿ”„ Attempting recovery with smaller dataset...")
65
  if len(filtered_data) > 1000:
66
  smaller_data = filtered_data.sample(n=1000, random_state=42)
67
  search_results = self.similarity_calc.calculate_similarity(
68
  features, smaller_data, top_k
69
  )
70
- print("โœ… Recovery successful with smaller dataset")
71
  else:
72
  raise similarity_error
73
 
74
- print(f"๐Ÿ” Found {len(search_results['results'])} results.")
75
 
76
- print("๐Ÿ“Š Creating results dataframe...")
77
- start_time = time.time()
78
  results_df = self._create_results_dataframe(search_results)
79
- df_time = time.time() - start_time
80
- print(f"โœ… Dataframe created in {df_time:.4f} seconds")
81
-
82
- print("๐ŸŽ‰ Recommendation process completed successfully!")
83
  return features.prompt_title, results_df
84
 
85
  except Exception as e:
86
- print(f"โŒ Critical error in recommendation process: {str(e)}")
87
- print(f"๐Ÿ“Š Full traceback: {traceback.format_exc()}")
88
- print(f"๐Ÿ” Exception type: {type(e).__name__}")
89
 
90
  try:
91
  import psutil
92
 
93
  process = psutil.Process()
94
  memory_usage = process.memory_info().rss / 1024 / 1024
95
- print(f"๐Ÿ’พ Current memory usage: {memory_usage:.2f} MB")
96
  except:
97
  pass
98
 
99
- return f"โŒ Error: {str(e)}", None
100
 
101
  def _parse_user_query(self, query: str) -> Features:
102
  try:
103
- print(f"๐Ÿ“ค Sending query to OpenAI: '{query}'")
104
  response = self.client.beta.chat.completions.parse(
105
  model="gpt-4o",
106
  messages=[
@@ -315,19 +289,16 @@ class RecommendationEngine:
315
  )
316
 
317
  response_model = response.choices[0].message.parsed
318
- print(f"๐Ÿ“ฅ OpenAI response received successfully")
319
- print(f"๐Ÿ” Response type: {type(response_model)}")
320
- print(f"๐Ÿ“‹ Response content: {response_model.model_dump_json(indent=2)}")
321
  return response_model
322
  except Exception as e:
323
- print(f"โŒ Error parsing user query: {str(e)}")
324
- print(f"๐Ÿ“Š Parse error traceback: {traceback.format_exc()}")
325
  return Features(
326
  movie_or_series="both",
327
  genres=[],
328
  quality_level="any",
329
  themes=[query],
330
- date_range=[2000, 2025],
331
  negative_keywords=[],
332
  production_region=[],
333
  )
 
22
  self.similarity_calc = SimilarityCalculator(self.model)
23
  self.filter = MovieFilter()
24
 
 
 
25
  def get_recommendations(self, user_query: str, top_k: int = 40):
26
+ print(f"Starting recommendation process for query: '{user_query}'")
27
  if not user_query.strip():
28
+ return "Please enter some text.", None
29
 
30
  try:
 
31
  start_time = time.time()
32
  features = self._parse_user_query(user_query)
 
 
 
 
 
33
  filtered_data = self.filter.apply_filters(self.data, features)
34
+
 
 
 
 
 
 
 
 
35
  try:
36
  search_results = self.similarity_calc.calculate_similarity(
37
  features, filtered_data, top_k
38
  )
 
 
 
 
 
39
  except Exception as similarity_error:
40
+ print(f"Error in similarity calculation: {str(similarity_error)}")
41
+ print(f"Traceback: {traceback.format_exc()}")
42
 
43
+ print("Attempting recovery with smaller dataset...")
44
  if len(filtered_data) > 1000:
45
  smaller_data = filtered_data.sample(n=1000, random_state=42)
46
  search_results = self.similarity_calc.calculate_similarity(
47
  features, smaller_data, top_k
48
  )
49
+ print("Recovery successful with smaller dataset")
50
  else:
51
  raise similarity_error
52
 
53
+ print(f"Found {len(search_results['results'])} results.")
54
 
 
 
55
  results_df = self._create_results_dataframe(search_results)
56
+ total_time = time.time() - start_time
57
+ print(f"Recommendation finished in {total_time:.4f} seconds")
 
 
58
  return features.prompt_title, results_df
59
 
60
  except Exception as e:
61
+ print(f"Critical error in recommendation process: {str(e)}")
62
+ print(f"Full traceback: {traceback.format_exc()}")
63
+ print(f"Exception type: {type(e).__name__}")
64
 
65
  try:
66
  import psutil
67
 
68
  process = psutil.Process()
69
  memory_usage = process.memory_info().rss / 1024 / 1024
70
+ print(f"Current memory usage: {memory_usage:.2f} MB")
71
  except:
72
  pass
73
 
74
+ return f"Error: {str(e)}", None
75
 
76
  def _parse_user_query(self, query: str) -> Features:
77
  try:
 
78
  response = self.client.beta.chat.completions.parse(
79
  model="gpt-4o",
80
  messages=[
 
289
  )
290
 
291
  response_model = response.choices[0].message.parsed
292
+ print(f"Response content: {response_model.model_dump_json(indent=2)}")
 
 
293
  return response_model
294
  except Exception as e:
295
+ print(f"Parse error traceback: {traceback.format_exc()}")
 
296
  return Features(
297
  movie_or_series="both",
298
  genres=[],
299
  quality_level="any",
300
  themes=[query],
301
+ date_range=[1900, 2025],
302
  negative_keywords=[],
303
  production_region=[],
304
  )