Danialebrat commited on
Commit
e6f81c2
·
1 Parent(s): c309583

- updating banned list of contents

Browse files

- changing selection logic, following the logic that we have in production

Config_files/message_system_config.json CHANGED
@@ -28,7 +28,8 @@
28
  "ollama_models": ["deepseek-r1:1.5b", "gemma3:4b", "deepseek-r1:7b", "gemma3:4b"],
29
  "claude_models": ["claude-3-5-haiku-latest"],
30
  "inference_models": ["google/gemma-3-27b-instruct/bf-16", "meta-llama/llama-3.2-11b-instruct/fp-16"],
31
- "google_models": ["gemini-2.5-flash-lite", "gemini-2.5-flash", "gemini-2.0-flash"]
 
32
  }
33
 
34
 
 
28
  "ollama_models": ["deepseek-r1:1.5b", "gemma3:4b", "deepseek-r1:7b", "gemma3:4b"],
29
  "claude_models": ["claude-3-5-haiku-latest"],
30
  "inference_models": ["google/gemma-3-27b-instruct/bf-16", "meta-llama/llama-3.2-11b-instruct/fp-16"],
31
+ "google_models": ["gemini-2.5-flash-lite", "gemini-2.5-flash", "gemini-2.0-flash"],
32
+ "banned_contents": [373883, 358813, 301039, 377366]
33
  }
34
 
35
 
Messaging_system/LLMR.py CHANGED
@@ -401,59 +401,119 @@ You are a helpful educational music content recommender. Your goal is to choose
401
  # ==========================================================================
402
  # Randomly select recommendations from top options
403
  # ==========================================================================
404
- # main random selector ---
405
  def _get_recommendation_random(self):
406
  """
407
- Randomly pick ONE item from the top-5 of each requested section.
 
408
  Also remove the picked item from every section in recsys_json.
409
- Returns: (content_id, content_info, updated_recsys_json, zero_tokens_dict)
410
  """
411
- # 1) Get user's recsys_result or fall back
 
 
412
  recsys_json = self._get_user_recommendation()
413
  try:
414
  recsys_data = json.loads(recsys_json) if recsys_json else {}
415
  except Exception:
416
  recsys_data = {}
417
 
418
- # 2) Build candidate pool (top 5 per section)
419
  sections = self.Core.recsys_contents
420
- candidates = self._collect_top_k(recsys_data, sections, k=5)
 
 
421
 
422
  # 3) Cold start or empty? -> use popular contents
423
- if not candidates:
 
424
  recsys_data = self._get_popular_fallback_json(k=5)
425
- candidates = self._collect_top_k(recsys_data, sections, k=5)
 
426
 
427
  # Still nothing? bail out
428
- if not candidates:
429
  return None, None, None, None
430
 
431
- # 4) Deduplicate by content_id, then pick randomly
432
- seen = set()
433
- unique_candidates = []
434
- for rec in candidates:
435
- cid = rec.get("content_id")
436
- if cid not in seen:
437
- seen.add(cid)
438
- unique_candidates.append(rec)
439
 
440
- picked_rec = random.choice(unique_candidates)
441
- picked_id = picked_rec["content_id"]
442
- recommendation_dict = self._get_recommendation_info(picked_id, recsys_data)
443
 
444
- # 5) Remove picked_id from ALL sections and store back
445
- recsys_data = self._remove_selected_from_all(recsys_data, picked_id)
 
 
 
 
 
 
 
 
 
 
 
446
 
 
 
447
 
448
- # 6) Track available ids if you still need it elsewhere
449
- self.selected_content_ids = [r["content_id"] for r in unique_candidates]
450
 
451
- # 7) Prepare return values
452
- content_info = self._get_content_info(picked_id)
453
  updated_json = json.dumps(recsys_data)
454
  zero_tokens = {"prompt_tokens": 0, "completion_tokens": 0}
455
 
456
  return recommendation_dict, content_info, updated_json, zero_tokens
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
457
  #======================================================================
458
  # helpers used by the random path
459
  #======================================================================
 
401
  # ==========================================================================
402
  # Randomly select recommendations from top options
403
  # ==========================================================================
 
404
  def _get_recommendation_random(self):
405
  """
406
+ Randomly pick ONE valid item from the top-5 of each requested section.
407
+ If the first random pick is missing/invalid, keep trying other candidates.
408
  Also remove the picked item from every section in recsys_json.
409
+ Returns: (recommendation_dict, content_info, updated_recsys_json, zero_tokens_dict)
410
  """
411
+ import json, random
412
+
413
+ # 1) Get user's recsys_result or fall back to {}
414
  recsys_json = self._get_user_recommendation()
415
  try:
416
  recsys_data = json.loads(recsys_json) if recsys_json else {}
417
  except Exception:
418
  recsys_data = {}
419
 
 
420
  sections = self.Core.recsys_contents
421
+
422
+ # 2) Primary candidate set
423
+ unique_candidates = self.build_unique_candidates(recsys_data, sections)
424
 
425
  # 3) Cold start or empty? -> use popular contents
426
+ used_popular_fallback = False
427
+ if not unique_candidates:
428
  recsys_data = self._get_popular_fallback_json(k=5)
429
+ unique_candidates = self.build_unique_candidates(recsys_data, sections)
430
+ used_popular_fallback = True
431
 
432
  # Still nothing? bail out
433
+ if not unique_candidates:
434
  return None, None, None, None
435
 
436
+ # 4) Try candidates in random order until a valid one is found
437
+ idxs = list(range(len(unique_candidates)))
438
+ random.shuffle(idxs)
 
 
 
 
 
439
 
440
+ picked_id, recommendation_dict, content_info = self.try_pick_from_candidates(idxs, unique_candidates,
441
+ recsys_data)
 
442
 
443
+ # 5) If nothing valid from primary set, and we haven't tried popular fallback yet, try it now
444
+ if picked_id is None and not used_popular_fallback:
445
+ recsys_data = self._get_popular_fallback_json(k=5)
446
+ unique_candidates = self.build_unique_candidates(recsys_data, sections)
447
+ if unique_candidates:
448
+ idxs = list(range(len(unique_candidates)))
449
+ random.shuffle(idxs)
450
+ picked_id, recommendation_dict, content_info = self.try_pick_from_candidates(idxs, unique_candidates,
451
+ recsys_data)
452
+
453
+ # 6) If still nothing, bail out
454
+ if picked_id is None:
455
+ return None, None, None, None
456
 
457
+ # 7) Remove picked_id from ALL sections and store back
458
+ recsys_data = self._remove_selected_from_all(recsys_data, picked_id)
459
 
460
+ # 8) Track available ids if you still need it elsewhere
461
+ self.selected_content_ids = [r["content_id"] for r in unique_candidates if r.get("content_id")]
462
 
463
+ # 9) Prepare return values
 
464
  updated_json = json.dumps(recsys_data)
465
  zero_tokens = {"prompt_tokens": 0, "completion_tokens": 0}
466
 
467
  return recommendation_dict, content_info, updated_json, zero_tokens
468
+
469
+ # ====================================================================
470
+ def build_unique_candidates(self, src_data, sections):
471
+ # Build candidate pool (top 5 per section) and dedupe by content_id
472
+ cands = self._collect_top_k(src_data, sections, k=5)
473
+ seen, uniq = set(), []
474
+ for rec in cands or []:
475
+ cid = rec.get("content_id")
476
+ if cid and cid not in seen:
477
+ seen.add(cid)
478
+ uniq.append(rec)
479
+ return uniq
480
+
481
+ # ======================================================================
482
+ def try_pick_from_candidates(self, idxs, candidates, source_data):
483
+ """
484
+ Iterate candidates in random order, returning the first valid pick:
485
+ (picked_id, recommendation_dict, content_info) or (None, None, None)
486
+ """
487
+ banned_contents = set(self.Core.config_file.get("banned_contents", [])) # use set for faster lookup
488
+
489
+ for i in idxs:
490
+ rec = candidates[i]
491
+ picked_id = rec.get("content_id")
492
+ if not picked_id:
493
+ continue
494
+ # Skip if content is banned
495
+ if picked_id in banned_contents:
496
+ continue
497
+ try:
498
+ # Validate we can fetch both info payloads
499
+ content_info = self._get_content_info(picked_id)
500
+ if not content_info:
501
+ # Treat falsy/empty as invalid and keep searching
502
+ continue
503
+
504
+ recommendation_dict = self._get_recommendation_info(picked_id, source_data)
505
+ # If both succeed, we have a winner
506
+ return picked_id, recommendation_dict, content_info
507
+
508
+ except IndexError:
509
+ # Your reported failure mode; skip this candidate
510
+ continue
511
+ except KeyError:
512
+ continue
513
+ except Exception:
514
+ # Any unexpected data issue: skip and try the next
515
+ continue
516
+ return None, None, None
517
  #======================================================================
518
  # helpers used by the random path
519
  #======================================================================