Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -20,6 +20,10 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
|
|
| 20 |
import time
|
| 21 |
import logging
|
| 22 |
import asyncio
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
# ========== CONFIGURATION ==========
|
| 25 |
PROFILES_DIR = "student_profiles"
|
|
@@ -29,6 +33,7 @@ MIN_AGE = 5
|
|
| 29 |
MAX_AGE = 120
|
| 30 |
SESSION_TOKEN_LENGTH = 32
|
| 31 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
|
|
|
| 32 |
|
| 33 |
# Initialize logging
|
| 34 |
logging.basicConfig(
|
|
@@ -48,6 +53,14 @@ if HF_TOKEN:
|
|
| 48 |
except Exception as e:
|
| 49 |
logging.error(f"Failed to initialize Hugging Face API: {str(e)}")
|
| 50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
# ========== MODEL LOADER ==========
|
| 52 |
class ModelLoader:
|
| 53 |
def __init__(self):
|
|
@@ -133,7 +146,7 @@ def generate_session_token() -> str:
|
|
| 133 |
|
| 134 |
def sanitize_input(text: str) -> str:
|
| 135 |
"""Sanitize user input to prevent XSS and injection attacks."""
|
| 136 |
-
return
|
| 137 |
|
| 138 |
def validate_name(name: str) -> str:
|
| 139 |
"""Validate name input."""
|
|
@@ -273,43 +286,134 @@ class TranscriptParser:
|
|
| 273 |
self.current_courses = []
|
| 274 |
self.course_history = []
|
| 275 |
self.graduation_status = {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 276 |
|
| 277 |
def parse_transcript(self, text: str) -> Dict:
|
| 278 |
-
"""Enhanced parsing method
|
| 279 |
try:
|
| 280 |
# First normalize the text (replace multiple spaces, normalize line breaks)
|
| 281 |
text = re.sub(r'\s+', ' ', text)
|
| 282 |
|
| 283 |
-
#
|
| 284 |
-
self.
|
| 285 |
-
|
| 286 |
-
# Extract requirements with better table parsing
|
| 287 |
-
self._extract_requirements(text)
|
| 288 |
|
| 289 |
-
#
|
| 290 |
-
self.
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
# Calculate completion status
|
| 296 |
-
self._calculate_completion()
|
| 297 |
-
|
| 298 |
-
return {
|
| 299 |
-
"student_info": self.student_data,
|
| 300 |
-
"requirements": self.requirements,
|
| 301 |
-
"current_courses": self.current_courses,
|
| 302 |
-
"course_history": self.course_history,
|
| 303 |
-
"graduation_status": self.graduation_status
|
| 304 |
-
}
|
| 305 |
|
| 306 |
except Exception as e:
|
| 307 |
logging.error(f"Error parsing transcript: {str(e)}")
|
| 308 |
-
raise gr.Error(f"Error parsing transcript: {str(e)}\n\nThis may be due to an unsupported transcript format. Please ensure you're uploading an official
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 309 |
|
| 310 |
def _extract_student_info(self, text: str):
|
| 311 |
"""Enhanced student info extraction for Miami-Dade format"""
|
| 312 |
-
# Extract basic student info
|
| 313 |
student_pattern = r"(\d{7})\s*-\s*([A-Z]+,\s*[A-Z]+)\s*Current Grade:\s*(\d+)\s*YOG\s*(\d{4})"
|
| 314 |
student_match = re.search(student_pattern, text, re.IGNORECASE)
|
| 315 |
|
|
@@ -488,7 +592,7 @@ class TranscriptParser:
|
|
| 488 |
}, indent=2)
|
| 489 |
|
| 490 |
def format_transcript_output(data: Dict) -> str:
|
| 491 |
-
"""Enhanced formatting for
|
| 492 |
output = []
|
| 493 |
|
| 494 |
# Student Info Section
|
|
@@ -498,30 +602,42 @@ def format_transcript_output(data: Dict) -> str:
|
|
| 498 |
output.append(f"**Student ID:** {student.get('id', 'Unknown')}")
|
| 499 |
output.append(f"**Current Grade:** {student.get('current_grade', 'Unknown')}")
|
| 500 |
output.append(f"**Graduation Year:** {student.get('graduation_year', 'Unknown')}")
|
| 501 |
-
output.append(f"**Unweighted GPA:** {student.get('unweighted_gpa', 'N/A')}")
|
| 502 |
-
output.append(f"**Weighted GPA:** {student.get('weighted_gpa', 'N/A')}")
|
| 503 |
-
output.append(f"**Total Credits Earned:** {student.get('total_credits', 'N/A')}")
|
| 504 |
-
output.append(f"**Community Service Hours:** {student.get('community_service_hours', 'N/A')}\n")
|
| 505 |
|
| 506 |
-
|
| 507 |
-
|
| 508 |
-
|
| 509 |
-
|
| 510 |
-
|
| 511 |
-
|
| 512 |
-
|
| 513 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 514 |
|
| 515 |
-
# Detailed Requirements
|
| 516 |
-
output.append("### Detailed Requirements:")
|
| 517 |
-
for code, req in data.get("requirements", {}).items():
|
| 518 |
-
output.append(
|
| 519 |
-
f"- **{code}**: {req.get('description', '')}\n"
|
| 520 |
-
f" Required: {req['required']} | Completed: {req['completed']} | "
|
| 521 |
-
f"Status: {req['status']}"
|
| 522 |
-
)
|
| 523 |
output.append("")
|
| 524 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 525 |
# Current Courses
|
| 526 |
if data.get("current_courses"):
|
| 527 |
output.append("## Current Courses (In Progress)\n" + '='*50)
|
|
@@ -537,7 +653,8 @@ def format_transcript_output(data: Dict) -> str:
|
|
| 537 |
# Course History by Year
|
| 538 |
courses_by_year = defaultdict(list)
|
| 539 |
for course in data.get("course_history", []):
|
| 540 |
-
|
|
|
|
| 541 |
|
| 542 |
if courses_by_year:
|
| 543 |
output.append("## Course History\n" + '='*50)
|
|
@@ -545,9 +662,10 @@ def format_transcript_output(data: Dict) -> str:
|
|
| 545 |
output.append(f"\n### {year}")
|
| 546 |
for course in courses_by_year[year]:
|
| 547 |
output.append(
|
| 548 |
-
f"- **{course
|
| 549 |
-
f"
|
| 550 |
-
f"
|
|
|
|
| 551 |
)
|
| 552 |
|
| 553 |
return '\n'.join(output)
|
|
@@ -603,7 +721,7 @@ def parse_transcript_with_ai_fallback(text: str, progress=gr.Progress()) -> Dict
|
|
| 603 |
if progress:
|
| 604 |
progress(0.1, desc="Processing transcript with AI...")
|
| 605 |
|
| 606 |
-
model, tokenizer =
|
| 607 |
if model is None or tokenizer is None:
|
| 608 |
raise gr.Error(f"Model failed to load. {model_loader.error or 'Please try loading a model first.'}")
|
| 609 |
|
|
@@ -643,6 +761,11 @@ def parse_transcript_with_ai_fallback(text: str, progress=gr.Progress()) -> Dict
|
|
| 643 |
logging.error(f"AI parsing error: {str(e)}")
|
| 644 |
raise gr.Error(f"Error processing transcript: {str(e)}\n\nPlease try again or contact support with this error message.")
|
| 645 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 646 |
def parse_transcript(file_obj, progress=gr.Progress()) -> Tuple[str, Optional[Dict]]:
|
| 647 |
"""Main function to parse transcript files with better error handling"""
|
| 648 |
try:
|
|
@@ -883,7 +1006,9 @@ class ProfileManager:
|
|
| 883 |
def get_profile_path(self, name: str) -> Path:
|
| 884 |
"""Get profile path with session token if available."""
|
| 885 |
if self.current_session:
|
| 886 |
-
|
|
|
|
|
|
|
| 887 |
return self.profiles_dir / f"{name.replace(' ', '_')}_profile.json"
|
| 888 |
|
| 889 |
def save_profile(self, name: str, age: Union[int, str], interests: str,
|
|
@@ -919,7 +1044,8 @@ class ProfileManager:
|
|
| 919 |
"learning_style": learning_style if learning_style else "Not assessed",
|
| 920 |
"favorites": favorites,
|
| 921 |
"blog": sanitize_input(blog) if blog else "",
|
| 922 |
-
"session_token": self.current_session
|
|
|
|
| 923 |
}
|
| 924 |
|
| 925 |
# Save to JSON file
|
|
@@ -959,12 +1085,12 @@ class ProfileManager:
|
|
| 959 |
return {}
|
| 960 |
|
| 961 |
if name:
|
| 962 |
-
# Find profile by name
|
| 963 |
-
|
| 964 |
if session_token:
|
| 965 |
-
profile_file = self.profiles_dir / f"{
|
| 966 |
else:
|
| 967 |
-
profile_file = self.profiles_dir / f"{
|
| 968 |
|
| 969 |
if not profile_file.exists():
|
| 970 |
# Try loading from HF Hub
|
|
@@ -985,7 +1111,11 @@ class ProfileManager:
|
|
| 985 |
profile_file = profiles[0]
|
| 986 |
|
| 987 |
with open(profile_file, "r", encoding='utf-8') as f:
|
| 988 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 989 |
|
| 990 |
except Exception as e:
|
| 991 |
logging.error(f"Error loading profile: {str(e)}")
|
|
@@ -1001,10 +1131,12 @@ class ProfileManager:
|
|
| 1001 |
# Extract just the name part (without session token)
|
| 1002 |
profile_names = []
|
| 1003 |
for p in profiles:
|
| 1004 |
-
|
| 1005 |
-
|
| 1006 |
-
|
| 1007 |
-
|
|
|
|
|
|
|
| 1008 |
|
| 1009 |
return profile_names
|
| 1010 |
|
|
@@ -1071,7 +1203,7 @@ class TeachingAssistant:
|
|
| 1071 |
self.context_history = []
|
| 1072 |
self.max_context_length = 5 # Keep last 5 exchanges for context
|
| 1073 |
|
| 1074 |
-
def generate_response(self, message: str, history: List[List[Union[str, None]]], session_token: str) -> str:
|
| 1075 |
"""Generate personalized response based on student profile and context."""
|
| 1076 |
try:
|
| 1077 |
# Load profile with session token
|
|
@@ -1092,7 +1224,7 @@ class TeachingAssistant:
|
|
| 1092 |
favorites = profile.get("favorites", {})
|
| 1093 |
|
| 1094 |
# Process message with context
|
| 1095 |
-
response = self._process_message(message, profile)
|
| 1096 |
|
| 1097 |
# Add follow-up suggestions
|
| 1098 |
if "study" in message.lower() or "learn" in message.lower():
|
|
@@ -1119,7 +1251,7 @@ class TeachingAssistant:
|
|
| 1119 |
# Trim to maintain max context length
|
| 1120 |
self.context_history = self.context_history[-(self.max_context_length*2):]
|
| 1121 |
|
| 1122 |
-
def _process_message(self, message: str, profile: Dict) -> str:
|
| 1123 |
"""Process user message with profile context."""
|
| 1124 |
message_lower = message.lower()
|
| 1125 |
|
|
@@ -1323,7 +1455,7 @@ def create_interface():
|
|
| 1323 |
4: False # AI Assistant
|
| 1324 |
})
|
| 1325 |
|
| 1326 |
-
# Custom CSS
|
| 1327 |
app.css = """
|
| 1328 |
.gradio-container { max-width: 1200px !important; margin: 0 auto !important; }
|
| 1329 |
.tab-content { padding: 20px !important; border: 1px solid #e0e0e0 !important; border-radius: 8px !important; margin-top: 10px !important; }
|
|
@@ -1335,15 +1467,28 @@ def create_interface():
|
|
| 1335 |
.quiz-question { margin-bottom: 15px; padding: 15px; background: #f5f5f5; border-radius: 5px; }
|
| 1336 |
.quiz-results { margin-top: 20px; padding: 20px; background: #e8f5e9; border-radius: 8px; }
|
| 1337 |
.error-message { color: #d32f2f; background-color: #ffebee; padding: 10px; border-radius: 4px; margin: 10px 0; }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1338 |
"""
|
| 1339 |
|
| 1340 |
-
# Header
|
| 1341 |
-
gr.
|
| 1342 |
-
|
| 1343 |
-
|
| 1344 |
-
|
| 1345 |
-
|
| 1346 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1347 |
# Navigation buttons
|
| 1348 |
with gr.Row():
|
| 1349 |
with gr.Column(scale=1, min_width=100):
|
|
@@ -1741,6 +1886,16 @@ def create_interface():
|
|
| 1741 |
outputs=[tabs, nav_message]
|
| 1742 |
)
|
| 1743 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1744 |
# Load model on startup
|
| 1745 |
app.load(fn=lambda: model_loader.load_model(), outputs=[])
|
| 1746 |
|
|
|
|
| 20 |
import time
|
| 21 |
import logging
|
| 22 |
import asyncio
|
| 23 |
+
from functools import lru_cache
|
| 24 |
+
import hashlib
|
| 25 |
+
import bleach
|
| 26 |
+
from concurrent.futures import ThreadPoolExecutor
|
| 27 |
|
| 28 |
# ========== CONFIGURATION ==========
|
| 29 |
PROFILES_DIR = "student_profiles"
|
|
|
|
| 33 |
MAX_AGE = 120
|
| 34 |
SESSION_TOKEN_LENGTH = 32
|
| 35 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
| 36 |
+
SESSION_TIMEOUT = 3600 # 1 hour session timeout
|
| 37 |
|
| 38 |
# Initialize logging
|
| 39 |
logging.basicConfig(
|
|
|
|
| 53 |
except Exception as e:
|
| 54 |
logging.error(f"Failed to initialize Hugging Face API: {str(e)}")
|
| 55 |
|
| 56 |
+
# ========== CACHING AND PERFORMANCE OPTIMIZATIONS ==========
|
| 57 |
+
executor = ThreadPoolExecutor(max_workers=4)
|
| 58 |
+
|
| 59 |
+
# Cache model loading
|
| 60 |
+
@lru_cache(maxsize=1)
|
| 61 |
+
def get_model_and_tokenizer():
|
| 62 |
+
return model_loader.load_model()
|
| 63 |
+
|
| 64 |
# ========== MODEL LOADER ==========
|
| 65 |
class ModelLoader:
|
| 66 |
def __init__(self):
|
|
|
|
| 146 |
|
| 147 |
def sanitize_input(text: str) -> str:
|
| 148 |
"""Sanitize user input to prevent XSS and injection attacks."""
|
| 149 |
+
return bleach.clean(text.strip(), tags=[], attributes={}, protocols=[], strip=True)
|
| 150 |
|
| 151 |
def validate_name(name: str) -> str:
|
| 152 |
"""Validate name input."""
|
|
|
|
| 286 |
self.current_courses = []
|
| 287 |
self.course_history = []
|
| 288 |
self.graduation_status = {}
|
| 289 |
+
self.supported_formats = {
|
| 290 |
+
'miami_dade': self.parse_miami_dade,
|
| 291 |
+
'standard': self.parse_standard,
|
| 292 |
+
'homeschool': self.parse_homeschool
|
| 293 |
+
}
|
| 294 |
|
| 295 |
def parse_transcript(self, text: str) -> Dict:
|
| 296 |
+
"""Enhanced parsing method with format detection"""
|
| 297 |
try:
|
| 298 |
# First normalize the text (replace multiple spaces, normalize line breaks)
|
| 299 |
text = re.sub(r'\s+', ' ', text)
|
| 300 |
|
| 301 |
+
# Detect transcript format
|
| 302 |
+
format_type = self.detect_format(text)
|
|
|
|
|
|
|
|
|
|
| 303 |
|
| 304 |
+
# Parse based on detected format
|
| 305 |
+
if format_type in self.supported_formats:
|
| 306 |
+
return self.supported_formats[format_type](text)
|
| 307 |
+
else:
|
| 308 |
+
# Fallback to standard parsing
|
| 309 |
+
return self.parse_standard(text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 310 |
|
| 311 |
except Exception as e:
|
| 312 |
logging.error(f"Error parsing transcript: {str(e)}")
|
| 313 |
+
raise gr.Error(f"Error parsing transcript: {str(e)}\n\nThis may be due to an unsupported transcript format. Please ensure you're uploading an official transcript or contact support.")
|
| 314 |
+
|
| 315 |
+
def detect_format(self, text: str) -> str:
|
| 316 |
+
"""Detect the transcript format"""
|
| 317 |
+
# Check for Miami-Dade specific patterns
|
| 318 |
+
if re.search(r'MIAMI-DADE SCHOOL DISTRICT', text, re.IGNORECASE):
|
| 319 |
+
return 'miami_dade'
|
| 320 |
+
# Check for homeschool patterns
|
| 321 |
+
elif re.search(r'homeschool|home education|parent signature', text, re.IGNORECASE):
|
| 322 |
+
return 'homeschool'
|
| 323 |
+
# Default to standard format
|
| 324 |
+
return 'standard'
|
| 325 |
+
|
| 326 |
+
def parse_miami_dade(self, text: str) -> Dict:
|
| 327 |
+
"""Parse Miami-Dade formatted transcripts"""
|
| 328 |
+
self._extract_student_info(text)
|
| 329 |
+
self._extract_requirements(text)
|
| 330 |
+
self._extract_course_history(text)
|
| 331 |
+
self._extract_current_courses(text)
|
| 332 |
+
self._calculate_completion()
|
| 333 |
+
|
| 334 |
+
return {
|
| 335 |
+
"student_info": self.student_data,
|
| 336 |
+
"requirements": self.requirements,
|
| 337 |
+
"current_courses": self.current_courses,
|
| 338 |
+
"course_history": self.course_history,
|
| 339 |
+
"graduation_status": self.graduation_status,
|
| 340 |
+
"format": "miami_dade"
|
| 341 |
+
}
|
| 342 |
+
|
| 343 |
+
def parse_standard(self, text: str) -> Dict:
|
| 344 |
+
"""Parse standard formatted transcripts"""
|
| 345 |
+
# Extract student info
|
| 346 |
+
student_match = re.search(r"Student:\s*([^\n]+)", text, re.IGNORECASE)
|
| 347 |
+
if student_match:
|
| 348 |
+
self.student_data["name"] = student_match.group(1).strip()
|
| 349 |
+
|
| 350 |
+
# Extract courses - looking for a table-like structure
|
| 351 |
+
course_pattern = r"(?P<year>\d{4}-\d{4}|\d{1,2})\s+(?P<subject>\w+)\s+(?P<code>\w+)\s+(?P<title>[^\n]+)\s+(?P<grade>[A-F][+-]?)\s+(?P<credit>\d\.\d)"
|
| 352 |
+
course_matches = re.finditer(course_pattern, text)
|
| 353 |
+
|
| 354 |
+
for match in course_matches:
|
| 355 |
+
self.course_history.append({
|
| 356 |
+
"school_year": match.group("year"),
|
| 357 |
+
"subject": match.group("subject"),
|
| 358 |
+
"course_code": match.group("code"),
|
| 359 |
+
"description": match.group("title").strip(),
|
| 360 |
+
"grade": match.group("grade"),
|
| 361 |
+
"credits": match.group("credit")
|
| 362 |
+
})
|
| 363 |
+
|
| 364 |
+
# Extract GPA info
|
| 365 |
+
gpa_pattern = r"GPA\s*([\d.]+)\s*/\s*([\d.]+)"
|
| 366 |
+
gpa_match = re.search(gpa_pattern, text)
|
| 367 |
+
if gpa_match:
|
| 368 |
+
self.student_data.update({
|
| 369 |
+
"unweighted_gpa": float(gpa_match.group(1)),
|
| 370 |
+
"weighted_gpa": float(gpa_match.group(2))
|
| 371 |
+
})
|
| 372 |
+
|
| 373 |
+
return {
|
| 374 |
+
"student_info": self.student_data,
|
| 375 |
+
"course_history": self.course_history,
|
| 376 |
+
"format": "standard"
|
| 377 |
+
}
|
| 378 |
+
|
| 379 |
+
def parse_homeschool(self, text: str) -> Dict:
|
| 380 |
+
"""Parse homeschool formatted transcripts"""
|
| 381 |
+
# Extract student info
|
| 382 |
+
name_match = re.search(r"Student:\s*([^\n]+)", text, re.IGNORECASE)
|
| 383 |
+
if name_match:
|
| 384 |
+
self.student_data["name"] = name_match.group(1).strip()
|
| 385 |
+
|
| 386 |
+
# Extract homeschool-specific info
|
| 387 |
+
parent_match = re.search(r"Parent:\s*([^\n]+)", text, re.IGNORECASE)
|
| 388 |
+
if parent_match:
|
| 389 |
+
self.student_data["parent"] = parent_match.group(1).strip()
|
| 390 |
+
|
| 391 |
+
# Extract courses - homeschool format often has simpler tables
|
| 392 |
+
course_pattern = r"(?P<subject>\w+)\s+(?P<title>[^\n]+?)\s+(?P<date>\w+-\d{4})\s+(?P<grade>[A-F][+-]?)\s+(?P<credit>\d\.\d)"
|
| 393 |
+
course_matches = re.finditer(course_pattern, text)
|
| 394 |
+
|
| 395 |
+
for match in course_matches:
|
| 396 |
+
self.course_history.append({
|
| 397 |
+
"subject": match.group("subject"),
|
| 398 |
+
"description": match.group("title").strip(),
|
| 399 |
+
"completion_date": match.group("date"),
|
| 400 |
+
"grade": match.group("grade"),
|
| 401 |
+
"credits": match.group("credit")
|
| 402 |
+
})
|
| 403 |
+
|
| 404 |
+
# Extract GPA info
|
| 405 |
+
gpa_match = re.search(r"Cumulative GPA:\s*([\d.]+)", text, re.IGNORECASE)
|
| 406 |
+
if gpa_match:
|
| 407 |
+
self.student_data["gpa"] = float(gpa_match.group(1))
|
| 408 |
+
|
| 409 |
+
return {
|
| 410 |
+
"student_info": self.student_data,
|
| 411 |
+
"course_history": self.course_history,
|
| 412 |
+
"format": "homeschool"
|
| 413 |
+
}
|
| 414 |
|
| 415 |
def _extract_student_info(self, text: str):
|
| 416 |
"""Enhanced student info extraction for Miami-Dade format"""
|
|
|
|
| 417 |
student_pattern = r"(\d{7})\s*-\s*([A-Z]+,\s*[A-Z]+)\s*Current Grade:\s*(\d+)\s*YOG\s*(\d{4})"
|
| 418 |
student_match = re.search(student_pattern, text, re.IGNORECASE)
|
| 419 |
|
|
|
|
| 592 |
}, indent=2)
|
| 593 |
|
| 594 |
def format_transcript_output(data: Dict) -> str:
|
| 595 |
+
"""Enhanced formatting for transcript output with format awareness"""
|
| 596 |
output = []
|
| 597 |
|
| 598 |
# Student Info Section
|
|
|
|
| 602 |
output.append(f"**Student ID:** {student.get('id', 'Unknown')}")
|
| 603 |
output.append(f"**Current Grade:** {student.get('current_grade', 'Unknown')}")
|
| 604 |
output.append(f"**Graduation Year:** {student.get('graduation_year', 'Unknown')}")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 605 |
|
| 606 |
+
if 'unweighted_gpa' in student and 'weighted_gpa' in student:
|
| 607 |
+
output.append(f"**Unweighted GPA:** {student['unweighted_gpa']}")
|
| 608 |
+
output.append(f"**Weighted GPA:** {student['weighted_gpa']}")
|
| 609 |
+
elif 'gpa' in student:
|
| 610 |
+
output.append(f"**GPA:** {student['gpa']}")
|
| 611 |
+
|
| 612 |
+
if 'total_credits' in student:
|
| 613 |
+
output.append(f"**Total Credits Earned:** {student['total_credits']}")
|
| 614 |
+
if 'community_service_hours' in student:
|
| 615 |
+
output.append(f"**Community Service Hours:** {student['community_service_hours']}")
|
| 616 |
+
if 'parent' in student:
|
| 617 |
+
output.append(f"**Parent/Guardian:** {student['parent']}")
|
| 618 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 619 |
output.append("")
|
| 620 |
|
| 621 |
+
# Graduation Requirements Section (for Miami-Dade format)
|
| 622 |
+
if data.get('format') == 'miami_dade':
|
| 623 |
+
grad_status = data.get("graduation_status", {})
|
| 624 |
+
output.append(f"## Graduation Progress\n{'='*50}")
|
| 625 |
+
output.append(f"**Overall Completion:** {grad_status.get('percent_complete', 0)}%")
|
| 626 |
+
output.append(f"**Credits Required:** {grad_status.get('total_required_credits', 0)}")
|
| 627 |
+
output.append(f"**Credits Completed:** {grad_status.get('total_completed_credits', 0)}")
|
| 628 |
+
output.append(f"**Credits Remaining:** {grad_status.get('remaining_credits', 0)}")
|
| 629 |
+
output.append(f"**On Track to Graduate:** {'Yes' if grad_status.get('on_track', False) else 'No'}\n")
|
| 630 |
+
|
| 631 |
+
# Detailed Requirements
|
| 632 |
+
output.append("### Detailed Requirements:")
|
| 633 |
+
for code, req in data.get("requirements", {}).items():
|
| 634 |
+
output.append(
|
| 635 |
+
f"- **{code}**: {req.get('description', '')}\n"
|
| 636 |
+
f" Required: {req['required']} | Completed: {req['completed']} | "
|
| 637 |
+
f"Status: {req['status']}"
|
| 638 |
+
)
|
| 639 |
+
output.append("")
|
| 640 |
+
|
| 641 |
# Current Courses
|
| 642 |
if data.get("current_courses"):
|
| 643 |
output.append("## Current Courses (In Progress)\n" + '='*50)
|
|
|
|
| 653 |
# Course History by Year
|
| 654 |
courses_by_year = defaultdict(list)
|
| 655 |
for course in data.get("course_history", []):
|
| 656 |
+
year_key = course.get("school_year", course.get("completion_date", "Unknown"))
|
| 657 |
+
courses_by_year[year_key].append(course)
|
| 658 |
|
| 659 |
if courses_by_year:
|
| 660 |
output.append("## Course History\n" + '='*50)
|
|
|
|
| 662 |
output.append(f"\n### {year}")
|
| 663 |
for course in courses_by_year[year]:
|
| 664 |
output.append(
|
| 665 |
+
f"- **{course.get('course_code', '')} {course.get('description', 'Unnamed course')}**\n"
|
| 666 |
+
f" Subject: {course.get('subject', 'N/A')} | "
|
| 667 |
+
f"Grade: {course.get('grade', 'N/A')} | "
|
| 668 |
+
f"Credits: {course.get('credits', 'N/A')}"
|
| 669 |
)
|
| 670 |
|
| 671 |
return '\n'.join(output)
|
|
|
|
| 721 |
if progress:
|
| 722 |
progress(0.1, desc="Processing transcript with AI...")
|
| 723 |
|
| 724 |
+
model, tokenizer = get_model_and_tokenizer()
|
| 725 |
if model is None or tokenizer is None:
|
| 726 |
raise gr.Error(f"Model failed to load. {model_loader.error or 'Please try loading a model first.'}")
|
| 727 |
|
|
|
|
| 761 |
logging.error(f"AI parsing error: {str(e)}")
|
| 762 |
raise gr.Error(f"Error processing transcript: {str(e)}\n\nPlease try again or contact support with this error message.")
|
| 763 |
|
| 764 |
+
async def parse_transcript_async(file_obj, progress=gr.Progress()) -> Tuple[str, Optional[Dict]]:
|
| 765 |
+
"""Async wrapper for transcript parsing"""
|
| 766 |
+
loop = asyncio.get_event_loop()
|
| 767 |
+
return await loop.run_in_executor(executor, parse_transcript, file_obj, progress)
|
| 768 |
+
|
| 769 |
def parse_transcript(file_obj, progress=gr.Progress()) -> Tuple[str, Optional[Dict]]:
|
| 770 |
"""Main function to parse transcript files with better error handling"""
|
| 771 |
try:
|
|
|
|
| 1006 |
def get_profile_path(self, name: str) -> Path:
|
| 1007 |
"""Get profile path with session token if available."""
|
| 1008 |
if self.current_session:
|
| 1009 |
+
# Hash the name for security
|
| 1010 |
+
name_hash = hashlib.sha256(name.encode()).hexdigest()[:16]
|
| 1011 |
+
return self.profiles_dir / f"{name_hash}_{self.current_session}_profile.json"
|
| 1012 |
return self.profiles_dir / f"{name.replace(' ', '_')}_profile.json"
|
| 1013 |
|
| 1014 |
def save_profile(self, name: str, age: Union[int, str], interests: str,
|
|
|
|
| 1044 |
"learning_style": learning_style if learning_style else "Not assessed",
|
| 1045 |
"favorites": favorites,
|
| 1046 |
"blog": sanitize_input(blog) if blog else "",
|
| 1047 |
+
"session_token": self.current_session,
|
| 1048 |
+
"last_updated": time.time()
|
| 1049 |
}
|
| 1050 |
|
| 1051 |
# Save to JSON file
|
|
|
|
| 1085 |
return {}
|
| 1086 |
|
| 1087 |
if name:
|
| 1088 |
+
# Find profile by name (hashed)
|
| 1089 |
+
name_hash = hashlib.sha256(name.encode()).hexdigest()[:16]
|
| 1090 |
if session_token:
|
| 1091 |
+
profile_file = self.profiles_dir / f"{name_hash}_{session_token}_profile.json"
|
| 1092 |
else:
|
| 1093 |
+
profile_file = self.profiles_dir / f"{name_hash}_profile.json"
|
| 1094 |
|
| 1095 |
if not profile_file.exists():
|
| 1096 |
# Try loading from HF Hub
|
|
|
|
| 1111 |
profile_file = profiles[0]
|
| 1112 |
|
| 1113 |
with open(profile_file, "r", encoding='utf-8') as f:
|
| 1114 |
+
profile_data = json.load(f)
|
| 1115 |
+
# Check session timeout
|
| 1116 |
+
if time.time() - profile_data.get('last_updated', 0) > SESSION_TIMEOUT:
|
| 1117 |
+
raise gr.Error("Session expired. Please start a new session.")
|
| 1118 |
+
return profile_data
|
| 1119 |
|
| 1120 |
except Exception as e:
|
| 1121 |
logging.error(f"Error loading profile: {str(e)}")
|
|
|
|
| 1131 |
# Extract just the name part (without session token)
|
| 1132 |
profile_names = []
|
| 1133 |
for p in profiles:
|
| 1134 |
+
with open(p, "r", encoding='utf-8') as f:
|
| 1135 |
+
try:
|
| 1136 |
+
data = json.load(f)
|
| 1137 |
+
profile_names.append(data.get('name', p.stem))
|
| 1138 |
+
except json.JSONDecodeError:
|
| 1139 |
+
continue
|
| 1140 |
|
| 1141 |
return profile_names
|
| 1142 |
|
|
|
|
| 1203 |
self.context_history = []
|
| 1204 |
self.max_context_length = 5 # Keep last 5 exchanges for context
|
| 1205 |
|
| 1206 |
+
async def generate_response(self, message: str, history: List[List[Union[str, None]]], session_token: str) -> str:
|
| 1207 |
"""Generate personalized response based on student profile and context."""
|
| 1208 |
try:
|
| 1209 |
# Load profile with session token
|
|
|
|
| 1224 |
favorites = profile.get("favorites", {})
|
| 1225 |
|
| 1226 |
# Process message with context
|
| 1227 |
+
response = await self._process_message(message, profile)
|
| 1228 |
|
| 1229 |
# Add follow-up suggestions
|
| 1230 |
if "study" in message.lower() or "learn" in message.lower():
|
|
|
|
| 1251 |
# Trim to maintain max context length
|
| 1252 |
self.context_history = self.context_history[-(self.max_context_length*2):]
|
| 1253 |
|
| 1254 |
+
async def _process_message(self, message: str, profile: Dict) -> str:
|
| 1255 |
"""Process user message with profile context."""
|
| 1256 |
message_lower = message.lower()
|
| 1257 |
|
|
|
|
| 1455 |
4: False # AI Assistant
|
| 1456 |
})
|
| 1457 |
|
| 1458 |
+
# Custom CSS with dark mode support
|
| 1459 |
app.css = """
|
| 1460 |
.gradio-container { max-width: 1200px !important; margin: 0 auto !important; }
|
| 1461 |
.tab-content { padding: 20px !important; border: 1px solid #e0e0e0 !important; border-radius: 8px !important; margin-top: 10px !important; }
|
|
|
|
| 1467 |
.quiz-question { margin-bottom: 15px; padding: 15px; background: #f5f5f5; border-radius: 5px; }
|
| 1468 |
.quiz-results { margin-top: 20px; padding: 20px; background: #e8f5e9; border-radius: 8px; }
|
| 1469 |
.error-message { color: #d32f2f; background-color: #ffebee; padding: 10px; border-radius: 4px; margin: 10px 0; }
|
| 1470 |
+
|
| 1471 |
+
/* Dark mode support */
|
| 1472 |
+
.dark .tab-content { background-color: #2d2d2d !important; border-color: #444 !important; }
|
| 1473 |
+
.dark .quiz-question { background-color: #3d3d3d !important; }
|
| 1474 |
+
.dark .quiz-results { background-color: #2e3d2e !important; }
|
| 1475 |
+
.dark textarea, .dark input { background-color: #333 !important; color: #eee !important; }
|
| 1476 |
+
.dark .output-markdown { color: #eee !important; }
|
| 1477 |
+
.dark .chatbot { background-color: #333 !important; }
|
| 1478 |
+
.dark .chatbot .user, .dark .chatbot .assistant { color: #eee !important; }
|
| 1479 |
"""
|
| 1480 |
|
| 1481 |
+
# Header with dark mode toggle
|
| 1482 |
+
with gr.Row():
|
| 1483 |
+
with gr.Column(scale=4):
|
| 1484 |
+
gr.Markdown("""
|
| 1485 |
+
# Student Learning Assistant
|
| 1486 |
+
**Your personalized education companion**
|
| 1487 |
+
Complete each step to get customized learning recommendations.
|
| 1488 |
+
""")
|
| 1489 |
+
with gr.Column(scale=1):
|
| 1490 |
+
dark_mode = gr.Checkbox(label="Dark Mode", value=False)
|
| 1491 |
+
|
| 1492 |
# Navigation buttons
|
| 1493 |
with gr.Row():
|
| 1494 |
with gr.Column(scale=1, min_width=100):
|
|
|
|
| 1886 |
outputs=[tabs, nav_message]
|
| 1887 |
)
|
| 1888 |
|
| 1889 |
+
# Dark mode toggle
|
| 1890 |
+
def toggle_dark_mode(dark):
|
| 1891 |
+
return gr.themes.Soft(primary_hue="blue", secondary_hue="gray") if not dark else gr.themes.Soft(primary_hue="blue", secondary_hue="gray", neutral_hue="slate")
|
| 1892 |
+
|
| 1893 |
+
dark_mode.change(
|
| 1894 |
+
fn=toggle_dark_mode,
|
| 1895 |
+
inputs=dark_mode,
|
| 1896 |
+
outputs=None
|
| 1897 |
+
)
|
| 1898 |
+
|
| 1899 |
# Load model on startup
|
| 1900 |
app.load(fn=lambda: model_loader.load_model(), outputs=[])
|
| 1901 |
|