Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -75,7 +75,6 @@ if HF_TOKEN:
|
|
| 75 |
logger.error(f"Attempt {attempt + 1} failed to initialize Hugging Face API: {str(e)}")
|
| 76 |
time.sleep(2 ** attempt)
|
| 77 |
|
| 78 |
-
# ========== UTILITY FUNCTIONS ==========
|
| 79 |
class DataEncryptor:
|
| 80 |
def __init__(self, key: str):
|
| 81 |
self.cipher = Fernet(key.encode())
|
|
@@ -146,7 +145,6 @@ def remove_sensitive_info(text: str) -> str:
|
|
| 146 |
text = re.sub(pattern, replacement, text)
|
| 147 |
return text
|
| 148 |
|
| 149 |
-
# ========== LEARNING STYLE QUIZ ==========
|
| 150 |
class LearningStyleQuiz:
|
| 151 |
def __init__(self):
|
| 152 |
self.questions = [
|
|
@@ -252,7 +250,6 @@ class LearningStyleQuiz:
|
|
| 252 |
# Initialize learning style quiz
|
| 253 |
learning_style_quiz = LearningStyleQuiz()
|
| 254 |
|
| 255 |
-
# ========== ENHANCED TRANSCRIPT PARSER ==========
|
| 256 |
class EnhancedMiamiDadeTranscriptParser:
|
| 257 |
def __init__(self):
|
| 258 |
self.patterns = {
|
|
@@ -268,7 +265,7 @@ class EnhancedMiamiDadeTranscriptParser:
|
|
| 268 |
re.DOTALL
|
| 269 |
),
|
| 270 |
'credits': re.compile(
|
| 271 |
-
r"\*\s+([A-Z\s]+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)\s*\*",
|
| 272 |
re.DOTALL
|
| 273 |
),
|
| 274 |
'course': re.compile(
|
|
@@ -291,72 +288,143 @@ class EnhancedMiamiDadeTranscriptParser:
|
|
| 291 |
def parse_transcript(self, file_path: str) -> Dict:
|
| 292 |
"""Parse Miami-Dade transcript PDF with enhanced pattern matching"""
|
| 293 |
try:
|
|
|
|
| 294 |
with pdfplumber.open(file_path) as pdf:
|
| 295 |
text = "\n".join(page.extract_text() for page in pdf.pages)
|
| 296 |
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 300 |
|
| 301 |
-
|
| 302 |
except Exception as e:
|
| 303 |
logger.error(f"Error parsing transcript: {str(e)}")
|
| 304 |
raise ValueError(f"Error processing transcript: {str(e)}")
|
| 305 |
|
| 306 |
def _parse_format(self, text: str) -> Dict:
|
| 307 |
-
"""Parse the transcript format
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 316 |
|
| 317 |
def _parse_student_info(self, text: str) -> Dict:
|
| 318 |
-
"""Extract student information"""
|
| 319 |
match = self.patterns['student_info'].search(text)
|
| 320 |
if not match:
|
| 321 |
-
|
| 322 |
-
|
|
|
|
|
|
|
|
|
|
| 323 |
return {
|
| 324 |
-
'name': match.group(1).replace(',', ' ').strip(),
|
| 325 |
-
'grade': match.group(2),
|
| 326 |
-
'student_id': match.group(3),
|
| 327 |
-
'school': match.group(4).strip(),
|
| 328 |
'birth_date': self._extract_birth_date(text),
|
| 329 |
'ethnicity': self._extract_ethnicity(text)
|
| 330 |
}
|
| 331 |
|
| 332 |
def _extract_birth_date(self, text: str) -> Optional[str]:
|
| 333 |
-
"""Extract birth date from transcript"""
|
| 334 |
-
|
| 335 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 336 |
|
| 337 |
def _extract_ethnicity(self, text: str) -> Optional[str]:
|
| 338 |
-
"""Extract ethnicity information"""
|
| 339 |
-
|
| 340 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 341 |
|
| 342 |
def _parse_academic_summary(self, text: str) -> Dict:
|
| 343 |
-
"""Parse academic summary section"""
|
| 344 |
-
gpa_match = self.patterns['gpa'].search(text)
|
| 345 |
-
credits_matches = self.patterns['credits'].finditer(text)
|
| 346 |
-
rank_match = self.patterns['class_rank'].search(text)
|
| 347 |
-
|
| 348 |
summary = {
|
| 349 |
-
'gpa': {
|
| 350 |
-
'district': float(gpa_match.group(1)) if gpa_match else None,
|
| 351 |
-
'state': float(gpa_match.group(2)) if gpa_match else None
|
| 352 |
-
},
|
| 353 |
'credits': {},
|
| 354 |
-
'class_rank': {
|
| 355 |
-
'percentile': int(rank_match.group(1)) if rank_match else None,
|
| 356 |
-
'class_size': int(rank_match.group(2)) if rank_match else None
|
| 357 |
-
}
|
| 358 |
}
|
| 359 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 360 |
for match in credits_matches:
|
| 361 |
subject = match.group(1).strip()
|
| 362 |
summary['credits'][subject] = {
|
|
@@ -365,28 +433,60 @@ class EnhancedMiamiDadeTranscriptParser:
|
|
| 365 |
'remaining': float(match.group(4)) if match.group(4) else None
|
| 366 |
}
|
| 367 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 368 |
return summary
|
| 369 |
|
| 370 |
def _parse_courses(self, text: str) -> List[Dict]:
|
| 371 |
-
"""Parse course history section"""
|
| 372 |
courses = []
|
|
|
|
|
|
|
| 373 |
for match in self.patterns['course'].finditer(text):
|
| 374 |
-
courses.append(
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
|
| 380 |
-
|
| 381 |
-
|
| 382 |
-
|
| 383 |
-
|
| 384 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 385 |
return courses
|
| 386 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 387 |
def _parse_assessments(self, text: str) -> Dict:
|
| 388 |
-
"""Parse assessment and requirement information"""
|
| 389 |
-
matches = self.patterns['assessment'].finditer(text)
|
| 390 |
assessments = {
|
| 391 |
'ela_passed_date': None,
|
| 392 |
'algebra_passed': False,
|
|
@@ -397,17 +497,22 @@ class EnhancedMiamiDadeTranscriptParser:
|
|
| 397 |
}
|
| 398 |
}
|
| 399 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 400 |
for match in matches:
|
| 401 |
if match.group(1): # ELA date
|
| 402 |
assessments['ela_passed_date'] = match.group(1)
|
| 403 |
elif match.group(2): # Algebra
|
| 404 |
assessments['algebra_passed'] = match.group(2) == "YES"
|
| 405 |
-
elif "BIOLOGY
|
| 406 |
assessments['biology_passed'] = True
|
| 407 |
-
elif match.group(
|
| 408 |
assessments['community_service'] = {
|
| 409 |
'met': True,
|
| 410 |
-
'hours': int(match.group(4))
|
| 411 |
}
|
| 412 |
|
| 413 |
return assessments
|
|
@@ -415,7 +520,6 @@ class EnhancedMiamiDadeTranscriptParser:
|
|
| 415 |
# Initialize the enhanced parser
|
| 416 |
transcript_parser = EnhancedMiamiDadeTranscriptParser()
|
| 417 |
|
| 418 |
-
# ========== ACADEMIC ANALYZER ==========
|
| 419 |
class AcademicAnalyzer:
|
| 420 |
def __init__(self):
|
| 421 |
self.gpa_scale = {
|
|
@@ -439,12 +543,16 @@ class AcademicAnalyzer:
|
|
| 439 |
}
|
| 440 |
|
| 441 |
try:
|
|
|
|
| 442 |
if parsed_data.get('format') == 'progress_summary':
|
| 443 |
weighted_gpa = float(parsed_data.get('student_info', {}).get('weighted_gpa', 0))
|
| 444 |
unweighted_gpa = float(parsed_data.get('student_info', {}).get('unweighted_gpa', 0))
|
| 445 |
-
|
| 446 |
weighted_gpa = float(parsed_data.get('academic_summary', {}).get('gpa', {}).get('district', 0))
|
| 447 |
unweighted_gpa = float(parsed_data.get('academic_summary', {}).get('gpa', {}).get('state', 0))
|
|
|
|
|
|
|
|
|
|
| 448 |
|
| 449 |
if weighted_gpa >= 4.5:
|
| 450 |
analysis['rating'] = 'Excellent'
|
|
@@ -502,12 +610,16 @@ class AcademicAnalyzer:
|
|
| 502 |
analysis['comparison'] += "\n\nThe small difference between your weighted and unweighted GPA suggests you might benefit from more challenging courses."
|
| 503 |
|
| 504 |
return analysis
|
| 505 |
-
except:
|
|
|
|
| 506 |
return {
|
| 507 |
'rating': 'Unknown',
|
| 508 |
-
'description': 'Could not analyze GPA',
|
| 509 |
-
'comparison': '',
|
| 510 |
-
'improvement_tips': [
|
|
|
|
|
|
|
|
|
|
| 511 |
}
|
| 512 |
|
| 513 |
def analyze_graduation_status(self, parsed_data: Dict) -> Dict:
|
|
@@ -603,13 +715,14 @@ class AcademicAnalyzer:
|
|
| 603 |
)
|
| 604 |
|
| 605 |
return analysis
|
| 606 |
-
except:
|
|
|
|
| 607 |
return {
|
| 608 |
-
'status': 'Could not analyze graduation status',
|
| 609 |
'completion_percentage': 0,
|
| 610 |
'missing_requirements': [],
|
| 611 |
'on_track': False,
|
| 612 |
-
'timeline': ''
|
| 613 |
}
|
| 614 |
|
| 615 |
def analyze_course_rigor(self, parsed_data: Dict) -> Dict:
|
|
@@ -678,7 +791,8 @@ class AcademicAnalyzer:
|
|
| 678 |
]
|
| 679 |
|
| 680 |
return analysis
|
| 681 |
-
except:
|
|
|
|
| 682 |
return {
|
| 683 |
'advanced_courses': 0,
|
| 684 |
'honors_courses': 0,
|
|
@@ -686,7 +800,10 @@ class AcademicAnalyzer:
|
|
| 686 |
'ib_courses': 0,
|
| 687 |
'de_courses': 0,
|
| 688 |
'rating': 'Unknown',
|
| 689 |
-
'recommendations': [
|
|
|
|
|
|
|
|
|
|
| 690 |
}
|
| 691 |
|
| 692 |
def generate_college_recommendations(self, parsed_data: Dict) -> Dict:
|
|
@@ -782,13 +899,17 @@ class AcademicAnalyzer:
|
|
| 782 |
recommendations['improvement_areas'].append("Increase community service involvement")
|
| 783 |
|
| 784 |
return recommendations
|
| 785 |
-
except:
|
|
|
|
| 786 |
return {
|
| 787 |
-
'reach': ["Could not generate recommendations"],
|
| 788 |
'target': [],
|
| 789 |
'safety': [],
|
| 790 |
'scholarships': [],
|
| 791 |
-
'improvement_areas': [
|
|
|
|
|
|
|
|
|
|
| 792 |
}
|
| 793 |
|
| 794 |
def generate_study_plan(self, parsed_data: Dict, learning_style: str) -> Dict:
|
|
@@ -867,18 +988,30 @@ class AcademicAnalyzer:
|
|
| 867 |
])
|
| 868 |
|
| 869 |
return plan
|
| 870 |
-
except:
|
|
|
|
| 871 |
return {
|
| 872 |
-
'weekly_schedule': {'Error': ["Could not generate schedule"]},
|
| 873 |
-
'study_strategies': [
|
| 874 |
-
|
| 875 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 876 |
}
|
| 877 |
|
| 878 |
# Initialize academic analyzer
|
| 879 |
academic_analyzer = AcademicAnalyzer()
|
| 880 |
|
| 881 |
-
# ========== DATA VISUALIZER ==========
|
| 882 |
class DataVisualizer:
|
| 883 |
def __init__(self):
|
| 884 |
self.color_palette = {
|
|
@@ -1146,7 +1279,6 @@ class DataVisualizer:
|
|
| 1146 |
# Initialize visualizer
|
| 1147 |
data_visualizer = DataVisualizer()
|
| 1148 |
|
| 1149 |
-
# ========== PROFILE MANAGER ==========
|
| 1150 |
class EnhancedProfileManager:
|
| 1151 |
def __init__(self):
|
| 1152 |
self.profiles_dir = Path(PROFILES_DIR)
|
|
@@ -1353,7 +1485,6 @@ class EnhancedProfileManager:
|
|
| 1353 |
# Initialize profile manager
|
| 1354 |
profile_manager = EnhancedProfileManager()
|
| 1355 |
|
| 1356 |
-
# ========== TEACHING ASSISTANT ==========
|
| 1357 |
class EnhancedTeachingAssistant:
|
| 1358 |
def __init__(self):
|
| 1359 |
self.context_history = []
|
|
@@ -1723,7 +1854,6 @@ class EnhancedTeachingAssistant:
|
|
| 1723 |
# Initialize teaching assistant
|
| 1724 |
teaching_assistant = EnhancedTeachingAssistant()
|
| 1725 |
|
| 1726 |
-
# ========== GRADIO INTERFACE ==========
|
| 1727 |
def create_enhanced_interface():
|
| 1728 |
with gr.Blocks(theme=gr.themes.Soft(), title="Student Learning Assistant") as app:
|
| 1729 |
session_token = gr.State(value=generate_session_token())
|
|
@@ -2454,5 +2584,4 @@ def create_enhanced_interface():
|
|
| 2454 |
app = create_enhanced_interface()
|
| 2455 |
|
| 2456 |
if __name__ == "__main__":
|
| 2457 |
-
app.launch(server_name="0.0.0.0", server_port=7860)
|
| 2458 |
-
|
|
|
|
| 75 |
logger.error(f"Attempt {attempt + 1} failed to initialize Hugging Face API: {str(e)}")
|
| 76 |
time.sleep(2 ** attempt)
|
| 77 |
|
|
|
|
| 78 |
class DataEncryptor:
|
| 79 |
def __init__(self, key: str):
|
| 80 |
self.cipher = Fernet(key.encode())
|
|
|
|
| 145 |
text = re.sub(pattern, replacement, text)
|
| 146 |
return text
|
| 147 |
|
|
|
|
| 148 |
class LearningStyleQuiz:
|
| 149 |
def __init__(self):
|
| 150 |
self.questions = [
|
|
|
|
| 250 |
# Initialize learning style quiz
|
| 251 |
learning_style_quiz = LearningStyleQuiz()
|
| 252 |
|
|
|
|
| 253 |
class EnhancedMiamiDadeTranscriptParser:
|
| 254 |
def __init__(self):
|
| 255 |
self.patterns = {
|
|
|
|
| 265 |
re.DOTALL
|
| 266 |
),
|
| 267 |
'credits': re.compile(
|
| 268 |
+
r"\*\s+([A-Z\s/]+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)\s*\*",
|
| 269 |
re.DOTALL
|
| 270 |
),
|
| 271 |
'course': re.compile(
|
|
|
|
| 288 |
def parse_transcript(self, file_path: str) -> Dict:
|
| 289 |
"""Parse Miami-Dade transcript PDF with enhanced pattern matching"""
|
| 290 |
try:
|
| 291 |
+
# First try pdfplumber
|
| 292 |
with pdfplumber.open(file_path) as pdf:
|
| 293 |
text = "\n".join(page.extract_text() for page in pdf.pages)
|
| 294 |
|
| 295 |
+
# Fallback to PyMuPDF if text extraction is poor
|
| 296 |
+
if len(text) < 500: # If we got very little text
|
| 297 |
+
doc = fitz.open(file_path)
|
| 298 |
+
text = ""
|
| 299 |
+
for page in doc:
|
| 300 |
+
text += page.get_text()
|
| 301 |
+
|
| 302 |
+
# Debug: Save extracted text
|
| 303 |
+
with open("debug_transcript.txt", "w") as f:
|
| 304 |
+
f.write(text)
|
| 305 |
|
| 306 |
+
return self._parse_format(text)
|
| 307 |
except Exception as e:
|
| 308 |
logger.error(f"Error parsing transcript: {str(e)}")
|
| 309 |
raise ValueError(f"Error processing transcript: {str(e)}")
|
| 310 |
|
| 311 |
def _parse_format(self, text: str) -> Dict:
|
| 312 |
+
"""Parse the transcript format with improved error handling"""
|
| 313 |
+
try:
|
| 314 |
+
parsed_data = {
|
| 315 |
+
'student_info': self._parse_student_info(text),
|
| 316 |
+
'academic_summary': self._parse_academic_summary(text),
|
| 317 |
+
'course_history': self._parse_courses(text),
|
| 318 |
+
'assessments': self._parse_assessments(text),
|
| 319 |
+
'format': 'miami_dade_v2'
|
| 320 |
+
}
|
| 321 |
+
|
| 322 |
+
# Validate we got at least some data
|
| 323 |
+
if not parsed_data['student_info'] or not parsed_data['course_history']:
|
| 324 |
+
raise ValueError("Incomplete data extracted from transcript")
|
| 325 |
+
|
| 326 |
+
return parsed_data
|
| 327 |
+
except Exception as e:
|
| 328 |
+
logger.error(f"Format parsing error: {str(e)}")
|
| 329 |
+
return self._parse_alternative_format(text)
|
| 330 |
+
|
| 331 |
+
def _parse_alternative_format(self, text: str) -> Dict:
|
| 332 |
+
"""Fallback parser for alternative formats"""
|
| 333 |
+
try:
|
| 334 |
+
parsed_data = {
|
| 335 |
+
'student_info': {},
|
| 336 |
+
'academic_summary': {},
|
| 337 |
+
'course_history': [],
|
| 338 |
+
'assessments': {},
|
| 339 |
+
'format': 'alternative'
|
| 340 |
+
}
|
| 341 |
+
|
| 342 |
+
# Try to extract basic student info
|
| 343 |
+
name_match = re.search(r"NAME:\s*([A-Z]+,\s*[A-Z]+)", text)
|
| 344 |
+
if name_match:
|
| 345 |
+
parsed_data['student_info']['name'] = name_match.group(1).replace(',', ' ').strip()
|
| 346 |
+
|
| 347 |
+
# Try to extract GPA
|
| 348 |
+
gpa_match = re.search(r"GPA:\s*([\d.]+)", text)
|
| 349 |
+
if gpa_match:
|
| 350 |
+
parsed_data['academic_summary']['gpa'] = {
|
| 351 |
+
'district': float(gpa_match.group(1)),
|
| 352 |
+
'state': float(gpa_match.group(1)) # Assume same if not specified
|
| 353 |
+
}
|
| 354 |
+
|
| 355 |
+
return parsed_data
|
| 356 |
+
except Exception as e:
|
| 357 |
+
logger.error(f"Alternative parser failed: {str(e)}")
|
| 358 |
+
raise ValueError("Could not parse transcript in any supported format")
|
| 359 |
|
| 360 |
def _parse_student_info(self, text: str) -> Dict:
|
| 361 |
+
"""Extract student information with improved pattern matching"""
|
| 362 |
match = self.patterns['student_info'].search(text)
|
| 363 |
if not match:
|
| 364 |
+
# Try alternative patterns
|
| 365 |
+
match = re.search(r"STUDENT INFORMATION.*?NAME:\s*([^\n]+)", text, re.DOTALL)
|
| 366 |
+
if not match:
|
| 367 |
+
return {}
|
| 368 |
+
|
| 369 |
return {
|
| 370 |
+
'name': match.group(1).replace(',', ' ').strip() if match else "Unknown",
|
| 371 |
+
'grade': match.group(2) if match and len(match.groups()) > 1 else "Unknown",
|
| 372 |
+
'student_id': match.group(3) if match and len(match.groups()) > 2 else "Unknown",
|
| 373 |
+
'school': match.group(4).strip() if match and len(match.groups()) > 3 else "Unknown",
|
| 374 |
'birth_date': self._extract_birth_date(text),
|
| 375 |
'ethnicity': self._extract_ethnicity(text)
|
| 376 |
}
|
| 377 |
|
| 378 |
def _extract_birth_date(self, text: str) -> Optional[str]:
|
| 379 |
+
"""Extract birth date from transcript with multiple pattern attempts"""
|
| 380 |
+
patterns = [
|
| 381 |
+
r"BIRTH DATE:\s*(\d{2}/\d{2}/\d{4})",
|
| 382 |
+
r"DOB:\s*(\d{2}/\d{2}/\d{4})",
|
| 383 |
+
r"DATE OF BIRTH:\s*([^\n]+)"
|
| 384 |
+
]
|
| 385 |
+
|
| 386 |
+
for pattern in patterns:
|
| 387 |
+
birth_match = re.search(pattern, text)
|
| 388 |
+
if birth_match:
|
| 389 |
+
return birth_match.group(1)
|
| 390 |
+
return None
|
| 391 |
|
| 392 |
def _extract_ethnicity(self, text: str) -> Optional[str]:
|
| 393 |
+
"""Extract ethnicity information with multiple pattern attempts"""
|
| 394 |
+
patterns = [
|
| 395 |
+
r"ETHNICITY:\s*([^\n]+)",
|
| 396 |
+
r"RACE/ETHNICITY:\s*([^\n]+)",
|
| 397 |
+
r"DEMOGRAPHICS.*?ETHNICITY:\s*([^\n]+)"
|
| 398 |
+
]
|
| 399 |
+
|
| 400 |
+
for pattern in patterns:
|
| 401 |
+
eth_match = re.search(pattern, text, re.DOTALL)
|
| 402 |
+
if eth_match:
|
| 403 |
+
return eth_match.group(1).strip()
|
| 404 |
+
return None
|
| 405 |
|
| 406 |
def _parse_academic_summary(self, text: str) -> Dict:
|
| 407 |
+
"""Parse academic summary section with improved error handling"""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 408 |
summary = {
|
| 409 |
+
'gpa': {'district': None, 'state': None},
|
|
|
|
|
|
|
|
|
|
| 410 |
'credits': {},
|
| 411 |
+
'class_rank': {'percentile': None, 'class_size': None}
|
|
|
|
|
|
|
|
|
|
| 412 |
}
|
| 413 |
|
| 414 |
+
# Try multiple GPA patterns
|
| 415 |
+
gpa_match = self.patterns['gpa'].search(text)
|
| 416 |
+
if not gpa_match:
|
| 417 |
+
gpa_match = re.search(r"GPA.*?([\d.]+).*?([\d.]+)", text)
|
| 418 |
+
|
| 419 |
+
if gpa_match:
|
| 420 |
+
summary['gpa']['district'] = float(gpa_match.group(1))
|
| 421 |
+
summary['gpa']['state'] = float(gpa_match.group(2)) if gpa_match.group(2) else summary['gpa']['district']
|
| 422 |
+
|
| 423 |
+
# Try multiple credit patterns
|
| 424 |
+
credits_matches = self.patterns['credits'].finditer(text)
|
| 425 |
+
if not credits_matches:
|
| 426 |
+
credits_matches = re.finditer(r"([A-Z ]+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)", text)
|
| 427 |
+
|
| 428 |
for match in credits_matches:
|
| 429 |
subject = match.group(1).strip()
|
| 430 |
summary['credits'][subject] = {
|
|
|
|
| 433 |
'remaining': float(match.group(4)) if match.group(4) else None
|
| 434 |
}
|
| 435 |
|
| 436 |
+
# Try multiple class rank patterns
|
| 437 |
+
rank_match = self.patterns['class_rank'].search(text)
|
| 438 |
+
if not rank_match:
|
| 439 |
+
rank_match = re.search(r"RANK.*?(\d+).*?(\d+)", text)
|
| 440 |
+
|
| 441 |
+
if rank_match:
|
| 442 |
+
summary['class_rank']['percentile'] = int(rank_match.group(1))
|
| 443 |
+
summary['class_rank']['class_size'] = int(rank_match.group(2))
|
| 444 |
+
|
| 445 |
return summary
|
| 446 |
|
| 447 |
def _parse_courses(self, text: str) -> List[Dict]:
|
| 448 |
+
"""Parse course history section with improved pattern matching"""
|
| 449 |
courses = []
|
| 450 |
+
|
| 451 |
+
# Try primary pattern first
|
| 452 |
for match in self.patterns['course'].finditer(text):
|
| 453 |
+
courses.append(self._create_course_dict(match))
|
| 454 |
+
|
| 455 |
+
# If no courses found, try alternative patterns
|
| 456 |
+
if not courses:
|
| 457 |
+
alt_pattern = re.compile(
|
| 458 |
+
r"(\d{4}-\d{4})\s+(\w+)\s+([^\n]+?)\s+([A-Z]{2})\s+([A-Z])\s+([\d.]+)",
|
| 459 |
+
re.DOTALL
|
| 460 |
+
)
|
| 461 |
+
for match in alt_pattern.finditer(text):
|
| 462 |
+
courses.append({
|
| 463 |
+
'term': match.group(1),
|
| 464 |
+
'course_code': match.group(2),
|
| 465 |
+
'course_title': match.group(3).strip(),
|
| 466 |
+
'subject_area': match.group(4),
|
| 467 |
+
'grade': match.group(5),
|
| 468 |
+
'credit_earned': float(match.group(6)),
|
| 469 |
+
'credit_attempted': float(match.group(6))
|
| 470 |
+
})
|
| 471 |
+
|
| 472 |
return courses
|
| 473 |
|
| 474 |
+
def _create_course_dict(self, match) -> Dict:
|
| 475 |
+
"""Create standardized course dictionary from regex match"""
|
| 476 |
+
return {
|
| 477 |
+
'term': match.group(1),
|
| 478 |
+
'course_code': match.group(2),
|
| 479 |
+
'course_title': match.group(3).strip(),
|
| 480 |
+
'subject_area': match.group(4),
|
| 481 |
+
'grade': match.group(5),
|
| 482 |
+
'flag': match.group(6),
|
| 483 |
+
'credit_status': match.group(7),
|
| 484 |
+
'credit_attempted': float(match.group(8)),
|
| 485 |
+
'credit_earned': float(match.group(9))
|
| 486 |
+
}
|
| 487 |
+
|
| 488 |
def _parse_assessments(self, text: str) -> Dict:
|
| 489 |
+
"""Parse assessment and requirement information with improved patterns"""
|
|
|
|
| 490 |
assessments = {
|
| 491 |
'ela_passed_date': None,
|
| 492 |
'algebra_passed': False,
|
|
|
|
| 497 |
}
|
| 498 |
}
|
| 499 |
|
| 500 |
+
# Try multiple assessment patterns
|
| 501 |
+
matches = self.patterns['assessment'].finditer(text)
|
| 502 |
+
if not matches:
|
| 503 |
+
matches = re.finditer(r"(ENGLISH|ALGEBRA|BIOLOGY|SERVICE).*?(PASSED|MET|YES|NO|\d{2}/\d{4})", text)
|
| 504 |
+
|
| 505 |
for match in matches:
|
| 506 |
if match.group(1): # ELA date
|
| 507 |
assessments['ela_passed_date'] = match.group(1)
|
| 508 |
elif match.group(2): # Algebra
|
| 509 |
assessments['algebra_passed'] = match.group(2) == "YES"
|
| 510 |
+
elif "BIOLOGY" in match.group(0):
|
| 511 |
assessments['biology_passed'] = True
|
| 512 |
+
elif "SERVICE" in match.group(0):
|
| 513 |
assessments['community_service'] = {
|
| 514 |
'met': True,
|
| 515 |
+
'hours': int(match.group(4)) if match.group(4) else 0
|
| 516 |
}
|
| 517 |
|
| 518 |
return assessments
|
|
|
|
| 520 |
# Initialize the enhanced parser
|
| 521 |
transcript_parser = EnhancedMiamiDadeTranscriptParser()
|
| 522 |
|
|
|
|
| 523 |
class AcademicAnalyzer:
|
| 524 |
def __init__(self):
|
| 525 |
self.gpa_scale = {
|
|
|
|
| 543 |
}
|
| 544 |
|
| 545 |
try:
|
| 546 |
+
# Handle multiple transcript formats
|
| 547 |
if parsed_data.get('format') == 'progress_summary':
|
| 548 |
weighted_gpa = float(parsed_data.get('student_info', {}).get('weighted_gpa', 0))
|
| 549 |
unweighted_gpa = float(parsed_data.get('student_info', {}).get('unweighted_gpa', 0))
|
| 550 |
+
elif parsed_data.get('format') == 'miami_dade_v2':
|
| 551 |
weighted_gpa = float(parsed_data.get('academic_summary', {}).get('gpa', {}).get('district', 0))
|
| 552 |
unweighted_gpa = float(parsed_data.get('academic_summary', {}).get('gpa', {}).get('state', 0))
|
| 553 |
+
else: # Alternative format
|
| 554 |
+
weighted_gpa = float(parsed_data.get('academic_summary', {}).get('gpa', 0))
|
| 555 |
+
unweighted_gpa = weighted_gpa # Assume same if not specified
|
| 556 |
|
| 557 |
if weighted_gpa >= 4.5:
|
| 558 |
analysis['rating'] = 'Excellent'
|
|
|
|
| 610 |
analysis['comparison'] += "\n\nThe small difference between your weighted and unweighted GPA suggests you might benefit from more challenging courses."
|
| 611 |
|
| 612 |
return analysis
|
| 613 |
+
except Exception as e:
|
| 614 |
+
logger.error(f"GPA analysis error: {str(e)}")
|
| 615 |
return {
|
| 616 |
'rating': 'Unknown',
|
| 617 |
+
'description': 'Could not analyze GPA - data may be missing or incomplete',
|
| 618 |
+
'comparison': 'Please verify your transcript contains GPA information',
|
| 619 |
+
'improvement_tips': [
|
| 620 |
+
"Check that your transcript includes GPA information",
|
| 621 |
+
"Ensure the file is clear and all text was extracted properly"
|
| 622 |
+
]
|
| 623 |
}
|
| 624 |
|
| 625 |
def analyze_graduation_status(self, parsed_data: Dict) -> Dict:
|
|
|
|
| 715 |
)
|
| 716 |
|
| 717 |
return analysis
|
| 718 |
+
except Exception as e:
|
| 719 |
+
logger.error(f"Graduation status error: {str(e)}")
|
| 720 |
return {
|
| 721 |
+
'status': 'Could not analyze graduation status - data may be incomplete',
|
| 722 |
'completion_percentage': 0,
|
| 723 |
'missing_requirements': [],
|
| 724 |
'on_track': False,
|
| 725 |
+
'timeline': 'Please verify your transcript contains credit information'
|
| 726 |
}
|
| 727 |
|
| 728 |
def analyze_course_rigor(self, parsed_data: Dict) -> Dict:
|
|
|
|
| 791 |
]
|
| 792 |
|
| 793 |
return analysis
|
| 794 |
+
except Exception as e:
|
| 795 |
+
logger.error(f"Course rigor error: {str(e)}")
|
| 796 |
return {
|
| 797 |
'advanced_courses': 0,
|
| 798 |
'honors_courses': 0,
|
|
|
|
| 800 |
'ib_courses': 0,
|
| 801 |
'de_courses': 0,
|
| 802 |
'rating': 'Unknown',
|
| 803 |
+
'recommendations': [
|
| 804 |
+
"Could not analyze course rigor - verify your transcript contains course information",
|
| 805 |
+
"Check that course titles and types were properly extracted"
|
| 806 |
+
]
|
| 807 |
}
|
| 808 |
|
| 809 |
def generate_college_recommendations(self, parsed_data: Dict) -> Dict:
|
|
|
|
| 899 |
recommendations['improvement_areas'].append("Increase community service involvement")
|
| 900 |
|
| 901 |
return recommendations
|
| 902 |
+
except Exception as e:
|
| 903 |
+
logger.error(f"College recommendations error: {str(e)}")
|
| 904 |
return {
|
| 905 |
+
'reach': ["Could not generate recommendations - insufficient data"],
|
| 906 |
'target': [],
|
| 907 |
'safety': [],
|
| 908 |
'scholarships': [],
|
| 909 |
+
'improvement_areas': [
|
| 910 |
+
"Complete your profile information",
|
| 911 |
+
"Ensure your transcript contains GPA and course information"
|
| 912 |
+
]
|
| 913 |
}
|
| 914 |
|
| 915 |
def generate_study_plan(self, parsed_data: Dict, learning_style: str) -> Dict:
|
|
|
|
| 988 |
])
|
| 989 |
|
| 990 |
return plan
|
| 991 |
+
except Exception as e:
|
| 992 |
+
logger.error(f"Study plan error: {str(e)}")
|
| 993 |
return {
|
| 994 |
+
'weekly_schedule': {'Error': ["Could not generate schedule - course data may be missing"]},
|
| 995 |
+
'study_strategies': [
|
| 996 |
+
"Review your notes regularly",
|
| 997 |
+
"Create a consistent study routine",
|
| 998 |
+
"Ask teachers for clarification when needed"
|
| 999 |
+
],
|
| 1000 |
+
'time_management_tips': [
|
| 1001 |
+
"Set aside dedicated study time each day",
|
| 1002 |
+
"Break large tasks into smaller chunks",
|
| 1003 |
+
"Use a planner to track assignments"
|
| 1004 |
+
],
|
| 1005 |
+
'resource_recommendations': [
|
| 1006 |
+
"Khan Academy",
|
| 1007 |
+
"Quizlet",
|
| 1008 |
+
"Your textbook and class materials"
|
| 1009 |
+
]
|
| 1010 |
}
|
| 1011 |
|
| 1012 |
# Initialize academic analyzer
|
| 1013 |
academic_analyzer = AcademicAnalyzer()
|
| 1014 |
|
|
|
|
| 1015 |
class DataVisualizer:
|
| 1016 |
def __init__(self):
|
| 1017 |
self.color_palette = {
|
|
|
|
| 1279 |
# Initialize visualizer
|
| 1280 |
data_visualizer = DataVisualizer()
|
| 1281 |
|
|
|
|
| 1282 |
class EnhancedProfileManager:
|
| 1283 |
def __init__(self):
|
| 1284 |
self.profiles_dir = Path(PROFILES_DIR)
|
|
|
|
| 1485 |
# Initialize profile manager
|
| 1486 |
profile_manager = EnhancedProfileManager()
|
| 1487 |
|
|
|
|
| 1488 |
class EnhancedTeachingAssistant:
|
| 1489 |
def __init__(self):
|
| 1490 |
self.context_history = []
|
|
|
|
| 1854 |
# Initialize teaching assistant
|
| 1855 |
teaching_assistant = EnhancedTeachingAssistant()
|
| 1856 |
|
|
|
|
| 1857 |
def create_enhanced_interface():
|
| 1858 |
with gr.Blocks(theme=gr.themes.Soft(), title="Student Learning Assistant") as app:
|
| 1859 |
session_token = gr.State(value=generate_session_token())
|
|
|
|
| 2584 |
app = create_enhanced_interface()
|
| 2585 |
|
| 2586 |
if __name__ == "__main__":
|
| 2587 |
+
app.launch(server_name="0.0.0.0", server_port=7860)
|
|
|