Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -32,7 +32,7 @@ class UniversalTranscriptParser:
|
|
| 32 |
'07': '7th Grade', '08': '8th Grade', 'MA': 'Middle School'
|
| 33 |
}
|
| 34 |
|
| 35 |
-
def parse_transcript(self, text: str) -> Dict[str, Union[Dict, List[Dict]]:
|
| 36 |
"""Determine transcript type and parse accordingly"""
|
| 37 |
transcript_type = self._identify_transcript_type(text)
|
| 38 |
|
|
@@ -51,7 +51,7 @@ class UniversalTranscriptParser:
|
|
| 51 |
return 'doral_academy'
|
| 52 |
return 'miami_dade'
|
| 53 |
|
| 54 |
-
def _parse_homeschool(self, text: str) -> Dict[str, Union[Dict, List[Dict]]:
|
| 55 |
"""Parse homeschool transcript format"""
|
| 56 |
courses = []
|
| 57 |
current_grade = None
|
|
@@ -176,7 +176,7 @@ class UniversalTranscriptParser:
|
|
| 176 |
'grade_level': grade_level
|
| 177 |
}
|
| 178 |
|
| 179 |
-
def _parse_miami_dade(self, text: str) -> Dict[str, Union[Dict, List[Dict]]:
|
| 180 |
"""Parse standard Miami-Dade format"""
|
| 181 |
courses = []
|
| 182 |
courses_by_grade = defaultdict(list)
|
|
@@ -230,6 +230,33 @@ class UniversalTranscriptParser:
|
|
| 230 |
'grade_level': grade_level
|
| 231 |
}
|
| 232 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 233 |
def extract_gpa(text, gpa_type):
|
| 234 |
pattern = rf'{gpa_type}\s*([\d\.]+)'
|
| 235 |
match = re.search(pattern, text)
|
|
|
|
| 32 |
'07': '7th Grade', '08': '8th Grade', 'MA': 'Middle School'
|
| 33 |
}
|
| 34 |
|
| 35 |
+
def parse_transcript(self, text: str) -> Dict[str, Union[Dict, List[Dict]]]:
|
| 36 |
"""Determine transcript type and parse accordingly"""
|
| 37 |
transcript_type = self._identify_transcript_type(text)
|
| 38 |
|
|
|
|
| 51 |
return 'doral_academy'
|
| 52 |
return 'miami_dade'
|
| 53 |
|
| 54 |
+
def _parse_homeschool(self, text: str) -> Dict[str, Union[Dict, List[Dict]]]:
|
| 55 |
"""Parse homeschool transcript format"""
|
| 56 |
courses = []
|
| 57 |
current_grade = None
|
|
|
|
| 176 |
'grade_level': grade_level
|
| 177 |
}
|
| 178 |
|
| 179 |
+
def _parse_miami_dade(self, text: str) -> Dict[str, Union[Dict, List[Dict]]]:
|
| 180 |
"""Parse standard Miami-Dade format"""
|
| 181 |
courses = []
|
| 182 |
courses_by_grade = defaultdict(list)
|
|
|
|
| 230 |
'grade_level': grade_level
|
| 231 |
}
|
| 232 |
|
| 233 |
+
# Helper methods for pattern compilation
|
| 234 |
+
def _compile_miami_dade_patterns(self):
|
| 235 |
+
return {
|
| 236 |
+
'student': re.compile(r'Current Grade:\s*(\d+).*YOG\s*(\d{4})'),
|
| 237 |
+
'course': re.compile(
|
| 238 |
+
r'([A-Z]-[A-Za-z\s&]+)\s*\|\s*(\d{4}-\d{4})\s*\|\s*(\d{2})\s*\|\s*([A-Z0-9]+)\s*\|\s*([^\|]+)\s*\|\s*([^\|]+)\s*\|\s*([^\|]+)\s*\|\s*([A-Z]?)\s*\|\s*([A-Z]?)\s*\|\s*([^\|]+)',
|
| 239 |
+
re.MULTILINE
|
| 240 |
+
)
|
| 241 |
+
}
|
| 242 |
+
|
| 243 |
+
def _compile_homeschool_patterns(self):
|
| 244 |
+
return {
|
| 245 |
+
'student': re.compile(r'Student Name:\s*(.+)\s*SSN:'),
|
| 246 |
+
'course': re.compile(
|
| 247 |
+
r'^\|?\s*([^\|]+?)\s*\|\s*([A-Z][+*]?)\s*\|\s*([^\|]+)\s*\|\s*(\d+\.?\d*)\s*\|\s*(\d+)'
|
| 248 |
+
)
|
| 249 |
+
}
|
| 250 |
+
|
| 251 |
+
def _compile_doral_academy_patterns(self):
|
| 252 |
+
return {
|
| 253 |
+
'student': re.compile(r'LEGAL NAME:\s*([^\n]+)'),
|
| 254 |
+
'course': re.compile(
|
| 255 |
+
r'(\d)\s+(\d{7})\s+([^\n]+?)\s+([A-Z]{2})\s+([A-Z])\s+([A-Z])\s+([A-Z])\s+(\d\.\d{2})\s+(\d\.\d{2})',
|
| 256 |
+
re.MULTILINE
|
| 257 |
+
)
|
| 258 |
+
}
|
| 259 |
+
|
| 260 |
def extract_gpa(text, gpa_type):
|
| 261 |
pattern = rf'{gpa_type}\s*([\d\.]+)'
|
| 262 |
match = re.search(pattern, text)
|