Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,6 +3,15 @@ import sys
|
|
| 3 |
import subprocess
|
| 4 |
import importlib
|
| 5 |
from datetime import datetime
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
required_packages = {
|
| 8 |
'gradio': 'gradio>=3.0',
|
|
@@ -26,18 +35,6 @@ def check_and_install_packages():
|
|
| 26 |
|
| 27 |
check_and_install_packages()
|
| 28 |
|
| 29 |
-
# ========== MAIN IMPORTS ==========
|
| 30 |
-
import gradio as gr
|
| 31 |
-
import pandas as pd
|
| 32 |
-
import json
|
| 33 |
-
import os
|
| 34 |
-
import re
|
| 35 |
-
from PyPDF2 import PdfReader
|
| 36 |
-
from collections import defaultdict
|
| 37 |
-
from transformers import pipeline
|
| 38 |
-
from typing import List, Dict, Union
|
| 39 |
-
import pdfplumber
|
| 40 |
-
|
| 41 |
# ========== TRANSCRIPT PARSING ==========
|
| 42 |
class UniversalTranscriptParser:
|
| 43 |
def __init__(self):
|
|
@@ -53,7 +50,6 @@ class UniversalTranscriptParser:
|
|
| 53 |
}
|
| 54 |
|
| 55 |
def parse_transcript(self, text: str) -> Dict[str, Union[Dict, List[Dict]]]:
|
| 56 |
-
"""Determine transcript type and parse accordingly"""
|
| 57 |
transcript_type = self._identify_transcript_type(text)
|
| 58 |
|
| 59 |
if transcript_type == 'homeschool':
|
|
@@ -64,7 +60,6 @@ class UniversalTranscriptParser:
|
|
| 64 |
return self._parse_miami_dade(text)
|
| 65 |
|
| 66 |
def _identify_transcript_type(self, text: str) -> str:
|
| 67 |
-
"""Identify which type of transcript we're processing"""
|
| 68 |
if re.search(r'Sample OFFICIAL HIGH SCHOOL TRANSCRIPT', text):
|
| 69 |
return 'homeschool'
|
| 70 |
elif re.search(r'DORAL ACADEMY HIGH SCHOOL', text):
|
|
@@ -72,27 +67,22 @@ class UniversalTranscriptParser:
|
|
| 72 |
return 'miami_dade'
|
| 73 |
|
| 74 |
def _parse_homeschool(self, text: str) -> Dict[str, Union[Dict, List[Dict]]]:
|
| 75 |
-
"""Parse homeschool transcript format"""
|
| 76 |
courses = []
|
| 77 |
current_grade = None
|
| 78 |
current_year = None
|
| 79 |
|
| 80 |
-
# Extract student info
|
| 81 |
student_info = {}
|
| 82 |
name_match = re.search(r'Student Name:\s*(.+)\s*SSN:', text)
|
| 83 |
if name_match:
|
| 84 |
student_info['name'] = name_match.group(1).strip()
|
| 85 |
|
| 86 |
-
# Process each line
|
| 87 |
for line in text.split('\n'):
|
| 88 |
-
# Check for grade level header
|
| 89 |
grade_match = re.match(r'^\|?\s*(\d+th Grade)\s*\|.*(\d{4}-\d{4})', line)
|
| 90 |
if grade_match:
|
| 91 |
current_grade = grade_match.group(1)
|
| 92 |
current_year = grade_match.group(2)
|
| 93 |
continue
|
| 94 |
|
| 95 |
-
# Course line pattern
|
| 96 |
course_match = re.match(
|
| 97 |
r'^\|?\s*([^\|]+?)\s*\|\s*([A-Z][+*]?)\s*\|\s*([^\|]+)\s*\|\s*(\d+\.?\d*)\s*\|\s*(\d+)',
|
| 98 |
line
|
|
@@ -100,7 +90,6 @@ class UniversalTranscriptParser:
|
|
| 100 |
|
| 101 |
if course_match and current_grade:
|
| 102 |
course_name = course_match.group(1).strip()
|
| 103 |
-
# Clean course names that start with | or have extra spaces
|
| 104 |
course_name = re.sub(r'^\|?\s*', '', course_name)
|
| 105 |
|
| 106 |
courses.append({
|
|
@@ -114,14 +103,7 @@ class UniversalTranscriptParser:
|
|
| 114 |
'transcript_type': 'homeschool'
|
| 115 |
})
|
| 116 |
|
| 117 |
-
|
| 118 |
-
gpa_data = {}
|
| 119 |
-
gpa_match = re.search(r'Cum\. GPA\s*\|\s*([\d\.]+)', text)
|
| 120 |
-
if gpa_match:
|
| 121 |
-
gpa_value = gpa_match.group(1)
|
| 122 |
-
gpa_data['unweighted'] = gpa_value
|
| 123 |
-
gpa_data['weighted'] = gpa_value # Homeschool often has same weighted/unweighted
|
| 124 |
-
|
| 125 |
return {
|
| 126 |
'student_info': student_info,
|
| 127 |
'courses': {'All': courses},
|
|
@@ -130,25 +112,19 @@ class UniversalTranscriptParser:
|
|
| 130 |
}
|
| 131 |
|
| 132 |
def _parse_doral_academy(self, text: str) -> Dict[str, Union[Dict, List[Dict]]]:
|
| 133 |
-
"""Parse Doral Academy specific format"""
|
| 134 |
courses = []
|
| 135 |
-
|
| 136 |
-
# Extract student info
|
| 137 |
student_info = {}
|
| 138 |
name_match = re.search(r'LEGAL NAME:\s*([^\n]+)', text)
|
| 139 |
if name_match:
|
| 140 |
student_info['name'] = name_match.group(1).strip()
|
| 141 |
|
| 142 |
-
# Extract school year information
|
| 143 |
year_pattern = re.compile(r'YEAR:\s*(\d{4}-\d{4})\s*GRADE LEVEL:\s*(\d{2})', re.MULTILINE)
|
| 144 |
year_matches = year_pattern.finditer(text)
|
| 145 |
|
| 146 |
-
# Create mapping of grade levels to years
|
| 147 |
grade_year_map = {}
|
| 148 |
for match in year_matches:
|
| 149 |
grade_year_map[match.group(2)] = match.group(1)
|
| 150 |
|
| 151 |
-
# Course pattern for Doral Academy
|
| 152 |
course_pattern = re.compile(
|
| 153 |
r'(\d)\s+(\d{7})\s+([^\n]+?)\s+([A-Z]{2})\s+([A-Z])\s+([A-Z])\s+([A-Z])\s+(\d\.\d{2})\s+(\d\.\d{2})',
|
| 154 |
re.MULTILINE
|
|
@@ -176,17 +152,7 @@ class UniversalTranscriptParser:
|
|
| 176 |
|
| 177 |
courses_by_grade[grade_level_num].append(course_info)
|
| 178 |
|
| 179 |
-
|
| 180 |
-
gpa_data = {}
|
| 181 |
-
unweighted_match = re.search(r'Un-weighted GPA\s*([\d\.]+)', text)
|
| 182 |
-
weighted_match = re.search(r'Weighted GPA\s*([\d\.]+)', text)
|
| 183 |
-
|
| 184 |
-
if unweighted_match:
|
| 185 |
-
gpa_data['unweighted'] = unweighted_match.group(1)
|
| 186 |
-
if weighted_match:
|
| 187 |
-
gpa_data['weighted'] = weighted_match.group(1)
|
| 188 |
-
|
| 189 |
-
# Extract current grade level
|
| 190 |
grade_level = "12" if re.search(r'GRADE LEVEL:\s*12', text) else "Unknown"
|
| 191 |
|
| 192 |
return {
|
|
@@ -197,17 +163,14 @@ class UniversalTranscriptParser:
|
|
| 197 |
}
|
| 198 |
|
| 199 |
def _parse_miami_dade(self, text: str) -> Dict[str, Union[Dict, List[Dict]]]:
|
| 200 |
-
"""Parse standard Miami-Dade format"""
|
| 201 |
courses = []
|
| 202 |
courses_by_grade = defaultdict(list)
|
| 203 |
|
| 204 |
-
# Extract student info
|
| 205 |
student_info = {}
|
| 206 |
name_match = re.search(r'0783977 - ([^,]+),\s*([^\n]+)', text)
|
| 207 |
if name_match:
|
| 208 |
student_info['name'] = f"{name_match.group(2)} {name_match.group(1)}"
|
| 209 |
|
| 210 |
-
# Course pattern for Miami-Dade
|
| 211 |
course_pattern = re.compile(
|
| 212 |
r'([A-Z]-[A-Za-z\s&]+)\s*\|\s*(\d{4}-\d{4})\s*\|\s*(\d{2})\s*\|\s*([A-Z0-9]+)\s*\|\s*([^\|]+)\s*\|\s*([^\|]+)\s*\|\s*([^\|]+)\s*\|\s*([A-Z]?)\s*\|\s*([A-Z]?)\s*\|\s*([^\|]+)',
|
| 213 |
re.MULTILINE
|
|
@@ -233,13 +196,7 @@ class UniversalTranscriptParser:
|
|
| 233 |
|
| 234 |
courses_by_grade[match.group(3)].append(course_info)
|
| 235 |
|
| 236 |
-
|
| 237 |
-
gpa_data = {
|
| 238 |
-
'weighted': self._extract_gpa(text, 'Weighted GPA'),
|
| 239 |
-
'unweighted': self._extract_gpa(text, 'Un-weighted GPA')
|
| 240 |
-
}
|
| 241 |
-
|
| 242 |
-
# Extract current grade level
|
| 243 |
grade_level = re.search(r'Current Grade:\s*(\d+)', text).group(1) if re.search(r'Current Grade:\s*(\d+)', text) else "Unknown"
|
| 244 |
|
| 245 |
return {
|
|
@@ -249,38 +206,50 @@ class UniversalTranscriptParser:
|
|
| 249 |
'grade_level': grade_level
|
| 250 |
}
|
| 251 |
|
| 252 |
-
def
|
| 253 |
-
"""
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
'
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
)
|
| 282 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 283 |
|
|
|
|
| 284 |
def parse_transcript(file):
|
| 285 |
parser = UniversalTranscriptParser()
|
| 286 |
|
|
@@ -292,15 +261,19 @@ def parse_transcript(file):
|
|
| 292 |
|
| 293 |
parsed_data = parser.parse_transcript(text)
|
| 294 |
|
| 295 |
-
#
|
| 296 |
gpa_data = parsed_data.get('gpa', {})
|
| 297 |
-
weighted_gpa = gpa_data.get('weighted', 'Not
|
| 298 |
-
unweighted_gpa = gpa_data.get('unweighted', 'Not
|
| 299 |
|
| 300 |
-
output_text = "
|
| 301 |
-
output_text += "GPA
|
| 302 |
-
output_text += f"
|
| 303 |
-
output_text += f"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 304 |
|
| 305 |
return output_text, parsed_data
|
| 306 |
else:
|
|
@@ -309,48 +282,12 @@ def parse_transcript(file):
|
|
| 309 |
# ========== LEARNING STYLE QUIZ ==========
|
| 310 |
learning_style_questions = [
|
| 311 |
"When you study for a test, you prefer to:",
|
| 312 |
-
|
| 313 |
-
"When you learn a new skill, you prefer to:",
|
| 314 |
-
"When you're trying to concentrate, you:",
|
| 315 |
-
"When you meet new people, you remember them by:",
|
| 316 |
-
"When you're assembling furniture or a gadget, you:",
|
| 317 |
-
"When choosing a restaurant, you rely most on:",
|
| 318 |
-
"When you're in a waiting room, you typically:",
|
| 319 |
-
"When giving someone instructions, you tend to:",
|
| 320 |
-
"When you're trying to recall information, you:",
|
| 321 |
-
"When you're at a museum or exhibit, you:",
|
| 322 |
-
"When you're learning a new language, you prefer:",
|
| 323 |
-
"When you're taking notes in class, you:",
|
| 324 |
-
"When you're explaining something complex, you:",
|
| 325 |
-
"When you're at a party, you enjoy:",
|
| 326 |
-
"When you're trying to remember a phone number, you:",
|
| 327 |
-
"When you're relaxing, you prefer to:",
|
| 328 |
-
"When you're learning to use new software, you:",
|
| 329 |
-
"When you're giving a presentation, you rely on:",
|
| 330 |
-
"When you're solving a difficult problem, you:"
|
| 331 |
]
|
| 332 |
|
| 333 |
learning_style_options = [
|
| 334 |
["Read the textbook (Reading/Writing)", "Listen to lectures (Auditory)", "Use diagrams/charts (Visual)", "Practice problems (Kinesthetic)"],
|
| 335 |
-
|
| 336 |
-
["Read instructions (Reading/Writing)", "Have someone show you (Visual)", "Listen to explanations (Auditory)", "Try it yourself (Kinesthetic)"],
|
| 337 |
-
["Need quiet (Reading/Writing)", "Need background noise (Auditory)", "Need to move around (Kinesthetic)", "Need visual stimulation (Visual)"],
|
| 338 |
-
["Their face (Visual)", "Their name (Auditory)", "What you talked about (Reading/Writing)", "What you did together (Kinesthetic)"],
|
| 339 |
-
["Read the instructions carefully (Reading/Writing)", "Look at the diagrams (Visual)", "Ask someone to explain (Auditory)", "Start putting pieces together (Kinesthetic)"],
|
| 340 |
-
["Online photos of the food (Visual)", "Recommendations from friends (Auditory)", "Reading the menu online (Reading/Writing)", "Remembering how it felt to eat there (Kinesthetic)"],
|
| 341 |
-
["Read magazines (Reading/Writing)", "Listen to music (Auditory)", "Watch TV (Visual)", "Fidget or move around (Kinesthetic)"],
|
| 342 |
-
["Write them down (Reading/Writing)", "Explain verbally (Auditory)", "Demonstrate (Visual)", "Guide them physically (Kinesthetic)"],
|
| 343 |
-
["See written words in your mind (Visual)", "Hear the information in your head (Auditory)", "Write it down to remember (Reading/Writing)", "Associate it with physical actions (Kinesthetic)"],
|
| 344 |
-
["Read all the descriptions (Reading/Writing)", "Listen to audio guides (Auditory)", "Look at the displays (Visual)", "Touch interactive exhibits (Kinesthetic)"],
|
| 345 |
-
["Study grammar rules (Reading/Writing)", "Listen to native speakers (Auditory)", "Use flashcards with images (Visual)", "Practice conversations (Kinesthetic)"],
|
| 346 |
-
["Write detailed paragraphs (Reading/Writing)", "Record the lecture (Auditory)", "Draw diagrams and charts (Visual)", "Doodle while listening (Kinesthetic)"],
|
| 347 |
-
["Write detailed steps (Reading/Writing)", "Explain verbally with examples (Auditory)", "Draw diagrams (Visual)", "Use physical objects to demonstrate (Kinesthetic)"],
|
| 348 |
-
["Conversations with people (Auditory)", "Watching others or the environment (Visual)", "Writing notes or texting (Reading/Writing)", "Dancing or physical activities (Kinesthetic)"],
|
| 349 |
-
["See the numbers in your mind (Visual)", "Say them aloud (Auditory)", "Write them down (Reading/Writing)", "Dial them on a keypad (Kinesthetic)"],
|
| 350 |
-
["Read a book (Reading/Writing)", "Listen to music (Auditory)", "Watch TV/movies (Visual)", "Do something physical (Kinesthetic)"],
|
| 351 |
-
["Read the manual (Reading/Writing)", "Ask someone to show you (Visual)", "Call tech support (Auditory)", "Experiment with the software (Kinesthetic)"],
|
| 352 |
-
["Detailed notes (Reading/Writing)", "Verbal explanations (Auditory)", "Visual slides (Visual)", "Physical demonstrations (Kinesthetic)"],
|
| 353 |
-
["Write out possible solutions (Reading/Writing)", "Talk through it with someone (Auditory)", "Draw diagrams (Visual)", "Build a model or prototype (Kinesthetic)"]
|
| 354 |
]
|
| 355 |
|
| 356 |
def learning_style_quiz(*answers):
|
|
@@ -374,48 +311,19 @@ def learning_style_quiz(*answers):
|
|
| 374 |
max_score = max(scores.values())
|
| 375 |
total_questions = len(learning_style_questions)
|
| 376 |
|
| 377 |
-
# Calculate percentages
|
| 378 |
percentages = {style: (score/total_questions)*100 for style, score in scores.items()}
|
| 379 |
-
|
| 380 |
-
# Sort styles by score (descending)
|
| 381 |
sorted_styles = sorted(scores.items(), key=lambda x: x[1], reverse=True)
|
| 382 |
|
| 383 |
-
# Prepare detailed results
|
| 384 |
result = "Your Learning Style Results:\n\n"
|
| 385 |
for style, score in sorted_styles:
|
| 386 |
result += f"{style}: {score}/{total_questions} ({percentages[style]:.1f}%)\n"
|
| 387 |
|
| 388 |
result += "\n"
|
| 389 |
-
|
| 390 |
-
# Determine primary and secondary styles
|
| 391 |
primary_styles = [style for style, score in scores.items() if score == max_score]
|
| 392 |
|
| 393 |
if len(primary_styles) == 1:
|
| 394 |
result += f"Your primary learning style is: {primary_styles[0]}\n\n"
|
| 395 |
-
|
| 396 |
-
result += "Tips for Visual Learners:\n"
|
| 397 |
-
result += "- Use color coding in your notes\n"
|
| 398 |
-
result += "- Create mind maps and diagrams\n"
|
| 399 |
-
result += "- Watch educational videos\n"
|
| 400 |
-
result += "- Use flashcards with images\n"
|
| 401 |
-
elif primary_styles[0] == "Auditory":
|
| 402 |
-
result += "Tips for Auditory Learners:\n"
|
| 403 |
-
result += "- Record lectures and listen to them\n"
|
| 404 |
-
result += "- Participate in study groups\n"
|
| 405 |
-
result += "- Explain concepts out loud to yourself\n"
|
| 406 |
-
result += "- Use rhymes or songs to remember information\n"
|
| 407 |
-
elif primary_styles[0] == "Reading/Writing":
|
| 408 |
-
result += "Tips for Reading/Writing Learners:\n"
|
| 409 |
-
result += "- Write detailed notes\n"
|
| 410 |
-
result += "- Create summaries in your own words\n"
|
| 411 |
-
result += "- Read textbooks and articles\n"
|
| 412 |
-
result += "- Make lists to organize information\n"
|
| 413 |
-
else: # Kinesthetic
|
| 414 |
-
result += "Tips for Kinesthetic Learners:\n"
|
| 415 |
-
result += "- Use hands-on activities\n"
|
| 416 |
-
result += "- Take frequent movement breaks\n"
|
| 417 |
-
result += "- Create physical models\n"
|
| 418 |
-
result += "- Associate information with physical actions\n"
|
| 419 |
else:
|
| 420 |
result += f"You have multiple strong learning styles: {', '.join(primary_styles)}\n\n"
|
| 421 |
result += "You may benefit from combining different learning approaches.\n"
|
|
@@ -426,7 +334,6 @@ def learning_style_quiz(*answers):
|
|
| 426 |
def save_profile(name, age, interests, transcript, learning_style,
|
| 427 |
movie, movie_reason, show, show_reason,
|
| 428 |
book, book_reason, character, character_reason, blog):
|
| 429 |
-
# Convert age to int if it's a numpy number (from gradio Number input)
|
| 430 |
age = int(age) if age else 0
|
| 431 |
|
| 432 |
favorites = {
|
|
@@ -455,7 +362,6 @@ def save_profile(name, age, interests, transcript, learning_style,
|
|
| 455 |
with open(json_path, "w") as f:
|
| 456 |
json.dump(data, f, indent=2)
|
| 457 |
|
| 458 |
-
# Create profile summary with clear GPA display
|
| 459 |
gpa = transcript.get('gpa', {})
|
| 460 |
markdown_summary = f"""### Student Profile: {name}
|
| 461 |
**Age:** {age}
|
|
@@ -492,30 +398,11 @@ def generate_response(message, history):
|
|
| 492 |
if not profile:
|
| 493 |
return "Please complete and save your profile first using the previous tabs."
|
| 494 |
|
| 495 |
-
# Get profile data
|
| 496 |
-
learning_style = profile.get("learning_style", "")
|
| 497 |
transcript = profile.get("transcript", {})
|
| 498 |
gpa = transcript.get("gpa", {})
|
| 499 |
-
courses = []
|
| 500 |
-
|
| 501 |
-
# Flatten all courses from all grades
|
| 502 |
-
if 'courses' in transcript:
|
| 503 |
-
if isinstance(transcript['courses'], dict):
|
| 504 |
-
for grade_courses in transcript['courses'].values():
|
| 505 |
-
courses.extend(grade_courses)
|
| 506 |
-
elif isinstance(transcript['courses'], list):
|
| 507 |
-
courses = transcript['courses']
|
| 508 |
-
|
| 509 |
-
# Common responses
|
| 510 |
-
greetings = ["hi", "hello", "hey"]
|
| 511 |
-
study_help = ["study", "learn", "prepare", "exam"]
|
| 512 |
-
grade_help = ["gpa", "grade", "weighted", "unweighted", "grades"]
|
| 513 |
-
course_help = ["courses", "classes", "subjects"]
|
| 514 |
|
| 515 |
-
|
| 516 |
-
|
| 517 |
-
|
| 518 |
-
elif any(word in message.lower() for word in grade_help):
|
| 519 |
response = "Your GPA Information:\n"
|
| 520 |
response += f"- Weighted GPA: {gpa.get('weighted', 'Not Available')}\n"
|
| 521 |
response += f"- Unweighted GPA: {gpa.get('unweighted', 'Not Available')}\n"
|
|
@@ -533,61 +420,7 @@ def generate_response(message, history):
|
|
| 533 |
|
| 534 |
return response
|
| 535 |
|
| 536 |
-
|
| 537 |
-
# Analyze course performance to give personalized advice
|
| 538 |
-
strong_subjects = [c['name'] for c in courses if 'grade' in c and c['grade'] in ['A', 'A+', 'B+']]
|
| 539 |
-
weak_subjects = [c['name'] for c in courses if 'grade' in c and c['grade'] in ['D', 'F']]
|
| 540 |
-
|
| 541 |
-
response = "Here are some personalized study tips:\n"
|
| 542 |
-
|
| 543 |
-
if strong_subjects:
|
| 544 |
-
response += f"\nYou're doing well in: {', '.join(strong_subjects[:3])}\n"
|
| 545 |
-
response += "→ Keep up the good work in these areas!\n"
|
| 546 |
-
|
| 547 |
-
if weak_subjects:
|
| 548 |
-
response += f"\nYou might want to focus more on: {', '.join(weak_subjects[:3])}\n"
|
| 549 |
-
response += "→ Consider getting extra help or tutoring\n"
|
| 550 |
-
|
| 551 |
-
# Add learning style specific tips
|
| 552 |
-
if "Visual" in learning_style:
|
| 553 |
-
response += "\nVisual Learner Tip: Try creating diagrams or mind maps\n"
|
| 554 |
-
elif "Auditory" in learning_style:
|
| 555 |
-
response += "\nAuditory Learner Tip: Record yourself explaining concepts\n"
|
| 556 |
-
elif "Reading/Writing" in learning_style:
|
| 557 |
-
response += "\nReading/Writing Tip: Write summaries in your own words\n"
|
| 558 |
-
elif "Kinesthetic" in learning_style:
|
| 559 |
-
response += "\nKinesthetic Tip: Use physical objects to demonstrate concepts\n"
|
| 560 |
-
|
| 561 |
-
return response
|
| 562 |
-
|
| 563 |
-
elif any(word in message.lower() for word in course_help):
|
| 564 |
-
if not courses:
|
| 565 |
-
return "No course information available."
|
| 566 |
-
|
| 567 |
-
# Group by subject area
|
| 568 |
-
subjects = defaultdict(list)
|
| 569 |
-
for course in courses:
|
| 570 |
-
if 'name' in course:
|
| 571 |
-
# Extract first word as subject area
|
| 572 |
-
subject = course['name'].split()[0]
|
| 573 |
-
subjects[subject].append(course)
|
| 574 |
-
|
| 575 |
-
response = "Your course subjects:\n"
|
| 576 |
-
for subject, subject_courses in subjects.items():
|
| 577 |
-
response += f"\n{subject} ({len(subject_courses)} courses)"
|
| 578 |
-
|
| 579 |
-
return response
|
| 580 |
-
|
| 581 |
-
elif "help" in message.lower():
|
| 582 |
-
return ("I can help with:\n"
|
| 583 |
-
"- Your GPA information\n"
|
| 584 |
-
"- Personalized study tips\n"
|
| 585 |
-
"- Course information\n"
|
| 586 |
-
"- Learning style recommendations")
|
| 587 |
-
|
| 588 |
-
else:
|
| 589 |
-
return ("I'm your personalized teaching assistant. "
|
| 590 |
-
"Ask me about your GPA, courses, or study tips!")
|
| 591 |
|
| 592 |
# ========== GRADIO INTERFACE ==========
|
| 593 |
with gr.Blocks() as app:
|
|
|
|
| 3 |
import subprocess
|
| 4 |
import importlib
|
| 5 |
from datetime import datetime
|
| 6 |
+
import re
|
| 7 |
+
import os
|
| 8 |
+
import json
|
| 9 |
+
import pdfplumber
|
| 10 |
+
from collections import defaultdict
|
| 11 |
+
from typing import List, Dict, Union
|
| 12 |
+
import gradio as gr
|
| 13 |
+
from PyPDF2 import PdfReader
|
| 14 |
+
from transformers import pipeline
|
| 15 |
|
| 16 |
required_packages = {
|
| 17 |
'gradio': 'gradio>=3.0',
|
|
|
|
| 35 |
|
| 36 |
check_and_install_packages()
|
| 37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
# ========== TRANSCRIPT PARSING ==========
|
| 39 |
class UniversalTranscriptParser:
|
| 40 |
def __init__(self):
|
|
|
|
| 50 |
}
|
| 51 |
|
| 52 |
def parse_transcript(self, text: str) -> Dict[str, Union[Dict, List[Dict]]]:
|
|
|
|
| 53 |
transcript_type = self._identify_transcript_type(text)
|
| 54 |
|
| 55 |
if transcript_type == 'homeschool':
|
|
|
|
| 60 |
return self._parse_miami_dade(text)
|
| 61 |
|
| 62 |
def _identify_transcript_type(self, text: str) -> str:
|
|
|
|
| 63 |
if re.search(r'Sample OFFICIAL HIGH SCHOOL TRANSCRIPT', text):
|
| 64 |
return 'homeschool'
|
| 65 |
elif re.search(r'DORAL ACADEMY HIGH SCHOOL', text):
|
|
|
|
| 67 |
return 'miami_dade'
|
| 68 |
|
| 69 |
def _parse_homeschool(self, text: str) -> Dict[str, Union[Dict, List[Dict]]]:
|
|
|
|
| 70 |
courses = []
|
| 71 |
current_grade = None
|
| 72 |
current_year = None
|
| 73 |
|
|
|
|
| 74 |
student_info = {}
|
| 75 |
name_match = re.search(r'Student Name:\s*(.+)\s*SSN:', text)
|
| 76 |
if name_match:
|
| 77 |
student_info['name'] = name_match.group(1).strip()
|
| 78 |
|
|
|
|
| 79 |
for line in text.split('\n'):
|
|
|
|
| 80 |
grade_match = re.match(r'^\|?\s*(\d+th Grade)\s*\|.*(\d{4}-\d{4})', line)
|
| 81 |
if grade_match:
|
| 82 |
current_grade = grade_match.group(1)
|
| 83 |
current_year = grade_match.group(2)
|
| 84 |
continue
|
| 85 |
|
|
|
|
| 86 |
course_match = re.match(
|
| 87 |
r'^\|?\s*([^\|]+?)\s*\|\s*([A-Z][+*]?)\s*\|\s*([^\|]+)\s*\|\s*(\d+\.?\d*)\s*\|\s*(\d+)',
|
| 88 |
line
|
|
|
|
| 90 |
|
| 91 |
if course_match and current_grade:
|
| 92 |
course_name = course_match.group(1).strip()
|
|
|
|
| 93 |
course_name = re.sub(r'^\|?\s*', '', course_name)
|
| 94 |
|
| 95 |
courses.append({
|
|
|
|
| 103 |
'transcript_type': 'homeschool'
|
| 104 |
})
|
| 105 |
|
| 106 |
+
gpa_data = self._extract_gpa_data(text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
return {
|
| 108 |
'student_info': student_info,
|
| 109 |
'courses': {'All': courses},
|
|
|
|
| 112 |
}
|
| 113 |
|
| 114 |
def _parse_doral_academy(self, text: str) -> Dict[str, Union[Dict, List[Dict]]]:
|
|
|
|
| 115 |
courses = []
|
|
|
|
|
|
|
| 116 |
student_info = {}
|
| 117 |
name_match = re.search(r'LEGAL NAME:\s*([^\n]+)', text)
|
| 118 |
if name_match:
|
| 119 |
student_info['name'] = name_match.group(1).strip()
|
| 120 |
|
|
|
|
| 121 |
year_pattern = re.compile(r'YEAR:\s*(\d{4}-\d{4})\s*GRADE LEVEL:\s*(\d{2})', re.MULTILINE)
|
| 122 |
year_matches = year_pattern.finditer(text)
|
| 123 |
|
|
|
|
| 124 |
grade_year_map = {}
|
| 125 |
for match in year_matches:
|
| 126 |
grade_year_map[match.group(2)] = match.group(1)
|
| 127 |
|
|
|
|
| 128 |
course_pattern = re.compile(
|
| 129 |
r'(\d)\s+(\d{7})\s+([^\n]+?)\s+([A-Z]{2})\s+([A-Z])\s+([A-Z])\s+([A-Z])\s+(\d\.\d{2})\s+(\d\.\d{2})',
|
| 130 |
re.MULTILINE
|
|
|
|
| 152 |
|
| 153 |
courses_by_grade[grade_level_num].append(course_info)
|
| 154 |
|
| 155 |
+
gpa_data = self._extract_gpa_data(text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
grade_level = "12" if re.search(r'GRADE LEVEL:\s*12', text) else "Unknown"
|
| 157 |
|
| 158 |
return {
|
|
|
|
| 163 |
}
|
| 164 |
|
| 165 |
def _parse_miami_dade(self, text: str) -> Dict[str, Union[Dict, List[Dict]]]:
|
|
|
|
| 166 |
courses = []
|
| 167 |
courses_by_grade = defaultdict(list)
|
| 168 |
|
|
|
|
| 169 |
student_info = {}
|
| 170 |
name_match = re.search(r'0783977 - ([^,]+),\s*([^\n]+)', text)
|
| 171 |
if name_match:
|
| 172 |
student_info['name'] = f"{name_match.group(2)} {name_match.group(1)}"
|
| 173 |
|
|
|
|
| 174 |
course_pattern = re.compile(
|
| 175 |
r'([A-Z]-[A-Za-z\s&]+)\s*\|\s*(\d{4}-\d{4})\s*\|\s*(\d{2})\s*\|\s*([A-Z0-9]+)\s*\|\s*([^\|]+)\s*\|\s*([^\|]+)\s*\|\s*([^\|]+)\s*\|\s*([A-Z]?)\s*\|\s*([A-Z]?)\s*\|\s*([^\|]+)',
|
| 176 |
re.MULTILINE
|
|
|
|
| 196 |
|
| 197 |
courses_by_grade[match.group(3)].append(course_info)
|
| 198 |
|
| 199 |
+
gpa_data = self._extract_gpa_data(text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 200 |
grade_level = re.search(r'Current Grade:\s*(\d+)', text).group(1) if re.search(r'Current Grade:\s*(\d+)', text) else "Unknown"
|
| 201 |
|
| 202 |
return {
|
|
|
|
| 206 |
'grade_level': grade_level
|
| 207 |
}
|
| 208 |
|
| 209 |
+
def _extract_gpa_data(self, text: str) -> Dict[str, str]:
|
| 210 |
+
"""Improved GPA extraction with multiple pattern matching"""
|
| 211 |
+
gpa_data = {}
|
| 212 |
+
|
| 213 |
+
# Weighted GPA patterns
|
| 214 |
+
weighted_patterns = [
|
| 215 |
+
r'Weighted GPA\s*:\s*([\d\.]+)',
|
| 216 |
+
r'Weighted GPA\s*([\d\.]+)',
|
| 217 |
+
r'GPA WTD\s*:\s*([\d\.]+)',
|
| 218 |
+
r'Weighted\s*:\s*([\d\.]+)'
|
| 219 |
+
]
|
| 220 |
+
|
| 221 |
+
# Unweighted GPA patterns
|
| 222 |
+
unweighted_patterns = [
|
| 223 |
+
r'Un-weighted GPA\s*:\s*([\d\.]+)',
|
| 224 |
+
r'Unweighted GPA\s*([\d\.]+)',
|
| 225 |
+
r'GPA UNWTD\s*:\s*([\d\.]+)',
|
| 226 |
+
r'Unweighted\s*:\s*([\d\.]+)'
|
| 227 |
+
]
|
| 228 |
+
|
| 229 |
+
# Try all weighted patterns
|
| 230 |
+
for pattern in weighted_patterns:
|
| 231 |
+
match = re.search(pattern, text, re.IGNORECASE)
|
| 232 |
+
if match:
|
| 233 |
+
gpa_data['weighted'] = match.group(1)
|
| 234 |
+
break
|
| 235 |
+
|
| 236 |
+
# Try all unweighted patterns
|
| 237 |
+
for pattern in unweighted_patterns:
|
| 238 |
+
match = re.search(pattern, text, re.IGNORECASE)
|
| 239 |
+
if match:
|
| 240 |
+
gpa_data['unweighted'] = match.group(1)
|
| 241 |
+
break
|
| 242 |
+
|
| 243 |
+
# Fallback to cumulative GPA if not found
|
| 244 |
+
if not gpa_data:
|
| 245 |
+
cumulative_match = re.search(r'Cumulative GPA\s*:\s*([\d\.]+)', text, re.IGNORECASE)
|
| 246 |
+
if cumulative_match:
|
| 247 |
+
gpa_data['weighted'] = cumulative_match.group(1)
|
| 248 |
+
gpa_data['unweighted'] = cumulative_match.group(1)
|
| 249 |
+
|
| 250 |
+
return gpa_data
|
| 251 |
|
| 252 |
+
# ========== TRANSCRIPT PROCESSING ==========
|
| 253 |
def parse_transcript(file):
|
| 254 |
parser = UniversalTranscriptParser()
|
| 255 |
|
|
|
|
| 261 |
|
| 262 |
parsed_data = parser.parse_transcript(text)
|
| 263 |
|
| 264 |
+
# Enhanced GPA display
|
| 265 |
gpa_data = parsed_data.get('gpa', {})
|
| 266 |
+
weighted_gpa = gpa_data.get('weighted', 'Not Found (Please check transcript)')
|
| 267 |
+
unweighted_gpa = gpa_data.get('unweighted', 'Not Found (Please check transcript)')
|
| 268 |
|
| 269 |
+
output_text = "=== TRANSCRIPT ANALYSIS RESULTS ===\n\n"
|
| 270 |
+
output_text += "GPA INFORMATION:\n"
|
| 271 |
+
output_text += f"🔹 Weighted GPA: {weighted_gpa}\n"
|
| 272 |
+
output_text += f"🔹 Unweighted GPA: {unweighted_gpa}\n\n"
|
| 273 |
+
|
| 274 |
+
if 'Not Found' in weighted_gpa or 'Not Found' in unweighted_gpa:
|
| 275 |
+
output_text += "NOTE: Could not automatically locate GPA information.\n"
|
| 276 |
+
output_text += "Please check your transcript for GPA details and enter them manually if needed.\n"
|
| 277 |
|
| 278 |
return output_text, parsed_data
|
| 279 |
else:
|
|
|
|
| 282 |
# ========== LEARNING STYLE QUIZ ==========
|
| 283 |
learning_style_questions = [
|
| 284 |
"When you study for a test, you prefer to:",
|
| 285 |
+
# ... [rest of your questions] ...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 286 |
]
|
| 287 |
|
| 288 |
learning_style_options = [
|
| 289 |
["Read the textbook (Reading/Writing)", "Listen to lectures (Auditory)", "Use diagrams/charts (Visual)", "Practice problems (Kinesthetic)"],
|
| 290 |
+
# ... [rest of your options] ...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 291 |
]
|
| 292 |
|
| 293 |
def learning_style_quiz(*answers):
|
|
|
|
| 311 |
max_score = max(scores.values())
|
| 312 |
total_questions = len(learning_style_questions)
|
| 313 |
|
|
|
|
| 314 |
percentages = {style: (score/total_questions)*100 for style, score in scores.items()}
|
|
|
|
|
|
|
| 315 |
sorted_styles = sorted(scores.items(), key=lambda x: x[1], reverse=True)
|
| 316 |
|
|
|
|
| 317 |
result = "Your Learning Style Results:\n\n"
|
| 318 |
for style, score in sorted_styles:
|
| 319 |
result += f"{style}: {score}/{total_questions} ({percentages[style]:.1f}%)\n"
|
| 320 |
|
| 321 |
result += "\n"
|
|
|
|
|
|
|
| 322 |
primary_styles = [style for style, score in scores.items() if score == max_score]
|
| 323 |
|
| 324 |
if len(primary_styles) == 1:
|
| 325 |
result += f"Your primary learning style is: {primary_styles[0]}\n\n"
|
| 326 |
+
# ... [rest of your learning style tips] ...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 327 |
else:
|
| 328 |
result += f"You have multiple strong learning styles: {', '.join(primary_styles)}\n\n"
|
| 329 |
result += "You may benefit from combining different learning approaches.\n"
|
|
|
|
| 334 |
def save_profile(name, age, interests, transcript, learning_style,
|
| 335 |
movie, movie_reason, show, show_reason,
|
| 336 |
book, book_reason, character, character_reason, blog):
|
|
|
|
| 337 |
age = int(age) if age else 0
|
| 338 |
|
| 339 |
favorites = {
|
|
|
|
| 362 |
with open(json_path, "w") as f:
|
| 363 |
json.dump(data, f, indent=2)
|
| 364 |
|
|
|
|
| 365 |
gpa = transcript.get('gpa', {})
|
| 366 |
markdown_summary = f"""### Student Profile: {name}
|
| 367 |
**Age:** {age}
|
|
|
|
| 398 |
if not profile:
|
| 399 |
return "Please complete and save your profile first using the previous tabs."
|
| 400 |
|
|
|
|
|
|
|
| 401 |
transcript = profile.get("transcript", {})
|
| 402 |
gpa = transcript.get("gpa", {})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 403 |
|
| 404 |
+
# When user asks about GPA
|
| 405 |
+
if any(word in message.lower() for word in ["gpa", "grade", "weighted", "unweighted"]):
|
|
|
|
|
|
|
| 406 |
response = "Your GPA Information:\n"
|
| 407 |
response += f"- Weighted GPA: {gpa.get('weighted', 'Not Available')}\n"
|
| 408 |
response += f"- Unweighted GPA: {gpa.get('unweighted', 'Not Available')}\n"
|
|
|
|
| 420 |
|
| 421 |
return response
|
| 422 |
|
| 423 |
+
# ... [rest of your AI assistant logic] ...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 424 |
|
| 425 |
# ========== GRADIO INTERFACE ==========
|
| 426 |
with gr.Blocks() as app:
|