NavyDevilDoc commited on
Commit
73edd8f
·
verified ·
1 Parent(s): 90ae931

Update src/core/QuizEngine.py

Browse files
Files changed (1) hide show
  1. src/core/QuizEngine.py +22 -14
src/core/QuizEngine.py CHANGED
@@ -31,35 +31,43 @@ class QuizEngine:
31
  user_dir = os.path.join(self.source_dir, username)
32
  if not os.path.exists(user_dir): return None
33
 
34
- # 1. Get files
35
  files = [f for f in os.listdir(user_dir) if f.lower().endswith(('.txt', '.md'))]
36
  if not files: return None
37
 
38
- # Retry Loop: Try up to 5 times to find a "worthy" chunk
39
- for attempt in range(5):
40
  selected_file = random.choice(files)
41
  try:
42
- with open(os.path.join(user_dir, selected_file), 'r', encoding='utf-8', errors='ignore') as f:
 
43
  text = f.read()
44
 
45
- # DEFENSE 1: Aggressive Heuristic Filtering
46
- # Split by double newline (paragraphs)
47
  paragraphs = text.split('\n\n')
48
  candidates = []
 
49
  for p in paragraphs:
50
  p = p.strip()
51
- # A. Too short?
52
- if len(p) < 250: continue
53
- # B. Looks like a list item or table row? (starts with number/bullet)
54
- if p[0].isdigit() or p.startswith(('-', '*', '•')): continue
55
- # C. Looks like administrative noise?
56
  if "intentionally left blank" in p.lower(): continue
 
57
 
 
 
 
 
 
 
 
58
  candidates.append(p)
59
 
60
  if not candidates: continue
61
 
62
- # Pick a random survivor
63
  selected_context = random.choice(candidates)
64
 
65
  return {
@@ -69,10 +77,10 @@ class QuizEngine:
69
  }
70
 
71
  except Exception as e:
72
- self.logger.error(f"Error fetching context: {e}")
73
  continue
74
 
75
- return None # Failed to find good text after 5 tries
76
 
77
  def construct_question_generation_prompt(self, context_text):
78
  """
 
31
  user_dir = os.path.join(self.source_dir, username)
32
  if not os.path.exists(user_dir): return None
33
 
 
34
  files = [f for f in os.listdir(user_dir) if f.lower().endswith(('.txt', '.md'))]
35
  if not files: return None
36
 
37
+ # Try up to 10 times to find a file with valid chunks
38
+ for attempt in range(10):
39
  selected_file = random.choice(files)
40
  try:
41
+ file_path = os.path.join(user_dir, selected_file)
42
+ with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
43
  text = f.read()
44
 
45
+ # DEFENSE 1: Revised Filtering
46
+ # We split by double newline to get paragraphs
47
  paragraphs = text.split('\n\n')
48
  candidates = []
49
+
50
  for p in paragraphs:
51
  p = p.strip()
52
+ # A. Absolute junk filter (too short to be a question)
53
+ if len(p) < 150: continue
54
+
55
+ # B. Administrative noise filter
 
56
  if "intentionally left blank" in p.lower(): continue
57
+ if "table of contents" in p.lower(): continue
58
 
59
+ # C. The "Number" Filter (RELAXED)
60
+ # Old rule: if p[0].isdigit(): continue
61
+ # New rule: If it starts with a number but is long, KEEP IT.
62
+ # We only ban "short" numbered lines (likely headers/TOC)
63
+ if p[0].isdigit() and len(p) < 250:
64
+ continue
65
+
66
  candidates.append(p)
67
 
68
  if not candidates: continue
69
 
70
+ # Success!
71
  selected_context = random.choice(candidates)
72
 
73
  return {
 
77
  }
78
 
79
  except Exception as e:
80
+ self.logger.error(f"Error fetching context from {selected_file}: {e}")
81
  continue
82
 
83
+ return None
84
 
85
  def construct_question_generation_prompt(self, context_text):
86
  """