AptlyDigital commited on
Commit
818bbf9
·
verified ·
1 Parent(s): f715c6e

Create utils.py

Browse files
Files changed (1) hide show
  1. utils.py +67 -0
utils.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Utility functions for SEA Prep Tutor
3
+ """
4
+ import re
5
+ import json
6
+ from typing import List, Dict, Optional
7
+
8
+ def clean_text(text: str) -> str:
9
+ """Clean and normalize text"""
10
+ # Remove extra whitespace
11
+ text = re.sub(r'\s+', ' ', text)
12
+ # Remove special characters but keep basic punctuation
13
+ text = re.sub(r'[^\w\s\.\?\!,;:\-\(\)]', '', text)
14
+ return text.strip()
15
+
16
+ def parse_mcq_options(text: str) -> Optional[Dict]:
17
+ """Parse MCQ options from text"""
18
+ options = {}
19
+
20
+ # Pattern for A), B), etc.
21
+ pattern = r'([A-D])[\)\.]\s*([^A-D]+?)(?=\s+[A-D][\)\.]|\s*$)'
22
+ matches = re.findall(pattern, text, re.IGNORECASE | re.DOTALL)
23
+
24
+ for letter, content in matches:
25
+ content = content.strip()
26
+ if content and len(content) > 1:
27
+ options[letter.upper()] = content
28
+
29
+ return options if options else None
30
+
31
+ def calculate_difficulty(text: str, subject: str) -> int:
32
+ """Calculate question difficulty (1-5)"""
33
+ word_count = len(text.split())
34
+ sentence_count = len(re.split(r'[.!?]', text))
35
+
36
+ base_score = min(5, word_count // 20 + sentence_count // 2)
37
+
38
+ if subject == "Math":
39
+ # Additional factors for math
40
+ numbers = len(re.findall(r'\d+', text))
41
+ operations = len(re.findall(r'[+\-×÷=]', text))
42
+ base_score += min(2, (numbers + operations) // 3)
43
+
44
+ return min(5, max(1, base_score))
45
+
46
+ def format_test_for_display(test: Dict) -> str:
47
+ """Format test as HTML for display"""
48
+ html = f"""
49
+ <div style="padding: 20px; background: #f8f9fa; border-radius: 10px;">
50
+ <h3>{test.get('title', 'Practice Test')}</h3>
51
+ <p><strong>Date:</strong> {test.get('date', '')}</p>
52
+ <p><strong>Questions:</strong> {len(test.get('questions', []))}</p>
53
+ <p><strong>Time:</strong> {test.get('total_time', '60 minutes')}</p>
54
+ <hr>
55
+ """
56
+
57
+ for i, q in enumerate(test.get('questions', []), 1):
58
+ html += f"""
59
+ <div style="margin: 15px 0; padding: 15px; background: white; border-radius: 5px;">
60
+ <h4>Question {i}</h4>
61
+ <p>{q.get('text', '')}</p>
62
+ <p><small>Type: {q.get('type', '')} | Difficulty: {q.get('difficulty_label', 'Medium')}</small></p>
63
+ </div>
64
+ """
65
+
66
+ html += "</div>"
67
+ return html