""" Utility functions for SEA Prep Tutor """ import re import json from typing import List, Dict, Optional def clean_text(text: str) -> str: """Clean and normalize text""" # Remove extra whitespace text = re.sub(r'\s+', ' ', text) # Remove special characters but keep basic punctuation text = re.sub(r'[^\w\s\.\?\!,;:\-\(\)]', '', text) return text.strip() def parse_mcq_options(text: str) -> Optional[Dict]: """Parse MCQ options from text""" options = {} # Pattern for A), B), etc. pattern = r'([A-D])[\)\.]\s*([^A-D]+?)(?=\s+[A-D][\)\.]|\s*$)' matches = re.findall(pattern, text, re.IGNORECASE | re.DOTALL) for letter, content in matches: content = content.strip() if content and len(content) > 1: options[letter.upper()] = content return options if options else None def calculate_difficulty(text: str, subject: str) -> int: """Calculate question difficulty (1-5)""" word_count = len(text.split()) sentence_count = len(re.split(r'[.!?]', text)) base_score = min(5, word_count // 20 + sentence_count // 2) if subject == "Math": # Additional factors for math numbers = len(re.findall(r'\d+', text)) operations = len(re.findall(r'[+\-×÷=]', text)) base_score += min(2, (numbers + operations) // 3) return min(5, max(1, base_score)) def format_test_for_display(test: Dict) -> str: """Format test as HTML for display""" html = f"""
Date: {test.get('date', '')}
Questions: {len(test.get('questions', []))}
Time: {test.get('total_time', '60 minutes')}
{q.get('text', '')}
Type: {q.get('type', '')} | Difficulty: {q.get('difficulty_label', 'Medium')}