Spaces:
Sleeping
Sleeping
Parthnuwal7 commited on
Commit ·
3d015cd
0
Parent(s):
Adding analytical content
Browse files- .gitattributes +35 -0
- .gitignore +28 -0
- Dockerfile +28 -0
- README.md +11 -0
- app.py +53 -0
- aspect_seeds.json +209 -0
- config.py +22 -0
- database/add_semester_columns.sql +9 -0
- database/db.py +10 -0
- database/migrate_domain_module.sql +113 -0
- database/migrate_to_text_fields.sql +17 -0
- database/schema.sql +151 -0
- domains/data_science.json +153 -0
- domains/mechanical_engineering.json +148 -0
- domains/software_engineering.json +162 -0
- models/personality_responses.py +48 -0
- models/student.py +42 -0
- models/text_responses.py +39 -0
- requirements.txt +12 -0
- routes/domain.py +202 -0
- routes/scoring.py +204 -0
- routes/students.py +173 -0
- services/README_text_v2.md +210 -0
- services/batch_aggregation.py +414 -0
- services/domain_knowledge_base.py +250 -0
- services/domain_plugins/__init__.py +10 -0
- services/domain_plugins/base_plugin.py +117 -0
- services/domain_plugins/business_plugin.py +205 -0
- services/domain_plugins/creative_plugin.py +210 -0
- services/domain_plugins/plugin_factory.py +73 -0
- services/domain_plugins/research_plugin.py +228 -0
- services/domain_plugins/tech_plugin.py +270 -0
- services/fidelity_transformer.py +481 -0
- services/fusion.py +150 -0
- services/personality_module.py +132 -0
- services/student_output.py +411 -0
- services/text_module.py +211 -0
- services/text_module_v2.py +576 -0
- services/universal_module.py +240 -0
.gitattributes
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Environment
|
| 2 |
+
.env
|
| 3 |
+
.env.example
|
| 4 |
+
.env.local
|
| 5 |
+
|
| 6 |
+
# Virtual environments
|
| 7 |
+
.venv/
|
| 8 |
+
venv/
|
| 9 |
+
env/
|
| 10 |
+
|
| 11 |
+
# Python cache
|
| 12 |
+
__pycache__/
|
| 13 |
+
*.py[cod]
|
| 14 |
+
*$py.class
|
| 15 |
+
*.pyo
|
| 16 |
+
|
| 17 |
+
# IDE
|
| 18 |
+
.idea/
|
| 19 |
+
.vscode/
|
| 20 |
+
*.swp
|
| 21 |
+
|
| 22 |
+
# OS
|
| 23 |
+
.DS_Store
|
| 24 |
+
Thumbs.db
|
| 25 |
+
|
| 26 |
+
# Cached centroids
|
| 27 |
+
*.npz
|
| 28 |
+
aspect_centroids.npz
|
Dockerfile
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.10-slim
|
| 2 |
+
|
| 3 |
+
WORKDIR /app
|
| 4 |
+
|
| 5 |
+
# Install system dependencies
|
| 6 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 7 |
+
build-essential \
|
| 8 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 9 |
+
|
| 10 |
+
# Copy requirements first for caching
|
| 11 |
+
COPY requirements.txt .
|
| 12 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 13 |
+
|
| 14 |
+
# Copy application code
|
| 15 |
+
COPY . .
|
| 16 |
+
|
| 17 |
+
# Create directories for caching
|
| 18 |
+
RUN mkdir -p /app/cache
|
| 19 |
+
|
| 20 |
+
# HuggingFace Spaces uses port 7860
|
| 21 |
+
ENV PORT=7860
|
| 22 |
+
ENV PYTHONUNBUFFERED=1
|
| 23 |
+
|
| 24 |
+
# Expose the port
|
| 25 |
+
EXPOSE 7860
|
| 26 |
+
|
| 27 |
+
# Run the application
|
| 28 |
+
CMD ["python", "app.py"]
|
README.md
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: FCT
|
| 3 |
+
emoji: 🦀
|
| 4 |
+
colorFrom: yellow
|
| 5 |
+
colorTo: green
|
| 6 |
+
sdk: docker
|
| 7 |
+
pinned: false
|
| 8 |
+
license: other
|
| 9 |
+
---
|
| 10 |
+
|
| 11 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Main Flask application for Analytics Module"""
|
| 2 |
+
from flask import Flask
|
| 3 |
+
from flask_cors import CORS
|
| 4 |
+
from config import Config
|
| 5 |
+
|
| 6 |
+
# Initialize Flask app
|
| 7 |
+
app = Flask(__name__)
|
| 8 |
+
app.config.from_object(Config)
|
| 9 |
+
CORS(app)
|
| 10 |
+
|
| 11 |
+
# Register blueprints
|
| 12 |
+
from routes.students import students_bp
|
| 13 |
+
from routes.scoring import scoring_bp
|
| 14 |
+
from routes.domain import domain_bp
|
| 15 |
+
|
| 16 |
+
app.register_blueprint(students_bp, url_prefix='/api/analytics')
|
| 17 |
+
app.register_blueprint(scoring_bp, url_prefix='/api/analytics')
|
| 18 |
+
app.register_blueprint(domain_bp, url_prefix='/api/analytics')
|
| 19 |
+
|
| 20 |
+
# Health check
|
| 21 |
+
@app.route('/health', methods=['GET'])
|
| 22 |
+
def health_check():
|
| 23 |
+
return {'status': 'healthy', 'service': 'analytics-api'}, 200
|
| 24 |
+
|
| 25 |
+
@app.route('/', methods=['GET'])
|
| 26 |
+
def home():
|
| 27 |
+
return {
|
| 28 |
+
'service': 'Student Profiling & Employability Scoring API',
|
| 29 |
+
'version': '1.0.0',
|
| 30 |
+
'endpoints': {
|
| 31 |
+
'students': '/api/analytics/students',
|
| 32 |
+
'personality': '/api/analytics/personality/<student_id>',
|
| 33 |
+
'text': '/api/analytics/text/<student_id>',
|
| 34 |
+
'score': '/api/analytics/score/<student_id>',
|
| 35 |
+
'leaderboard': '/api/analytics/leaderboard',
|
| 36 |
+
'domain': {
|
| 37 |
+
'available': '/api/analytics/domain/available',
|
| 38 |
+
'submit': '/api/analytics/students/<student_id>/domain-evidence',
|
| 39 |
+
'get': '/api/analytics/students/<student_id>/domain-evidence',
|
| 40 |
+
'delete': '/api/analytics/students/<student_id>/domain-evidence/<domain_type>'
|
| 41 |
+
}
|
| 42 |
+
}
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
if __name__ == '__main__':
|
| 46 |
+
import os
|
| 47 |
+
port = int(os.getenv('PORT', 7860)) # HuggingFace uses 7860
|
| 48 |
+
debug = os.getenv('DEBUG', 'False').lower() == 'true'
|
| 49 |
+
|
| 50 |
+
print(f"🚀 Analytics API starting on port {port}")
|
| 51 |
+
print(f"📊 Scoring modules: Universal, Personality, Text, Domain (Tech/Business/Creative/Research)")
|
| 52 |
+
print(f"🔗 Base URL: http://0.0.0.0:{port}")
|
| 53 |
+
app.run(host='0.0.0.0', port=port, debug=debug)
|
aspect_seeds.json
ADDED
|
@@ -0,0 +1,209 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"leadership": [
|
| 3 |
+
"led a team",
|
| 4 |
+
"was team lead",
|
| 5 |
+
"managed a project",
|
| 6 |
+
"supervised interns",
|
| 7 |
+
"coordinated a cross-functional team",
|
| 8 |
+
"organized the club",
|
| 9 |
+
"president of the society",
|
| 10 |
+
"captain of the team",
|
| 11 |
+
"ran weekly standups",
|
| 12 |
+
"delegated tasks",
|
| 13 |
+
"mentored junior members",
|
| 14 |
+
"headed the project",
|
| 15 |
+
"oversaw project timelines",
|
| 16 |
+
"chaired the committee",
|
| 17 |
+
"led end-to-end delivery",
|
| 18 |
+
"directed project milestones",
|
| 19 |
+
"led a 5-person team",
|
| 20 |
+
"managed stakeholders",
|
| 21 |
+
"took ownership of the initiative",
|
| 22 |
+
"led code reviews",
|
| 23 |
+
"organized campus events",
|
| 24 |
+
"led product demo sessions",
|
| 25 |
+
"led recruitment for volunteers",
|
| 26 |
+
"managed vendor relationships",
|
| 27 |
+
"spearheaded the outreach program"
|
| 28 |
+
],
|
| 29 |
+
"technical_skills": [
|
| 30 |
+
"developed a web API",
|
| 31 |
+
"implemented RESTful services",
|
| 32 |
+
"coded in python",
|
| 33 |
+
"built machine learning models",
|
| 34 |
+
"trained neural networks",
|
| 35 |
+
"implemented data pipelines",
|
| 36 |
+
"used pandas for ETL",
|
| 37 |
+
"designed database schemas",
|
| 38 |
+
"built microservices",
|
| 39 |
+
"deployed models using docker",
|
| 40 |
+
"worked with FastAPI",
|
| 41 |
+
"implemented CI/CD",
|
| 42 |
+
"wrote unit tests",
|
| 43 |
+
"optimized SQL queries",
|
| 44 |
+
"used scikit-learn",
|
| 45 |
+
"developed recommendation systems",
|
| 46 |
+
"built feature engineering pipelines",
|
| 47 |
+
"deployed to cloud",
|
| 48 |
+
"developed ETL jobs",
|
| 49 |
+
"worked with Kafka",
|
| 50 |
+
"implemented caching layers",
|
| 51 |
+
"used TensorFlow or PyTorch",
|
| 52 |
+
"built backend services",
|
| 53 |
+
"wrote production-grade code",
|
| 54 |
+
"integrated third-party APIs"
|
| 55 |
+
],
|
| 56 |
+
"problem_solving": [
|
| 57 |
+
"solved complex problem",
|
| 58 |
+
"debugged production issues",
|
| 59 |
+
"optimized an algorithm",
|
| 60 |
+
"reduced latency of service",
|
| 61 |
+
"designed a scalable solution",
|
| 62 |
+
"investigated root cause",
|
| 63 |
+
"improved system reliability",
|
| 64 |
+
"created a novel solution",
|
| 65 |
+
"troubleshot integration issues",
|
| 66 |
+
"automated manual tasks",
|
| 67 |
+
"reduced memory usage",
|
| 68 |
+
"resolved data pipeline failures",
|
| 69 |
+
"refactored critical code",
|
| 70 |
+
"handled edge cases",
|
| 71 |
+
"iterated on prototypes",
|
| 72 |
+
"performed A/B testing to decide",
|
| 73 |
+
"diagnosed performance bottlenecks",
|
| 74 |
+
"designed fallback strategies",
|
| 75 |
+
"resolved deployment failures",
|
| 76 |
+
"created monitoring & alerts"
|
| 77 |
+
],
|
| 78 |
+
"internships_experience": [
|
| 79 |
+
"summer internship",
|
| 80 |
+
"industrial training",
|
| 81 |
+
"interned at",
|
| 82 |
+
"worked as an intern",
|
| 83 |
+
"internship project",
|
| 84 |
+
"internship in data science",
|
| 85 |
+
"interned at a startup",
|
| 86 |
+
"completed internship at",
|
| 87 |
+
"interned with the engineering team",
|
| 88 |
+
"intern experience",
|
| 89 |
+
"interned at an e-commerce company",
|
| 90 |
+
"industrial internship",
|
| 91 |
+
"co-op placement",
|
| 92 |
+
"paid internship",
|
| 93 |
+
"research internship",
|
| 94 |
+
"interned as a software engineer",
|
| 95 |
+
"on-the-job training",
|
| 96 |
+
"worked under mentor",
|
| 97 |
+
"internship-driven project",
|
| 98 |
+
"corporate internship"
|
| 99 |
+
],
|
| 100 |
+
"communication": [
|
| 101 |
+
"presented to stakeholders",
|
| 102 |
+
"gave a presentation",
|
| 103 |
+
"wrote documentation",
|
| 104 |
+
"authored reports",
|
| 105 |
+
"explained results to non-technical",
|
| 106 |
+
"public speaking",
|
| 107 |
+
"delivered demo",
|
| 108 |
+
"prepared slides",
|
| 109 |
+
"wrote user guides",
|
| 110 |
+
"communicated with clients",
|
| 111 |
+
"collaborated across teams",
|
| 112 |
+
"conducted knowledge transfer",
|
| 113 |
+
"wrote clear emails",
|
| 114 |
+
"explained technical concepts",
|
| 115 |
+
"presented project outcomes",
|
| 116 |
+
"led demo sessions",
|
| 117 |
+
"created onboarding docs",
|
| 118 |
+
"contributed to team discussions",
|
| 119 |
+
"led workshops",
|
| 120 |
+
"hosted training sessions"
|
| 121 |
+
],
|
| 122 |
+
"teamwork": [
|
| 123 |
+
"collaborated with team",
|
| 124 |
+
"worked in a cross-functional team",
|
| 125 |
+
"paired programming",
|
| 126 |
+
"contributed to group project",
|
| 127 |
+
"supported teammates",
|
| 128 |
+
"collaborated on design",
|
| 129 |
+
"worked with designers and PMs",
|
| 130 |
+
"helped teammates debug",
|
| 131 |
+
"co-authored project",
|
| 132 |
+
"mentored peers",
|
| 133 |
+
"shared responsibilities",
|
| 134 |
+
"worked effectively in group",
|
| 135 |
+
"contributed in agile team",
|
| 136 |
+
"participated in sprints",
|
| 137 |
+
"assisted in integration"
|
| 138 |
+
],
|
| 139 |
+
"project_execution": [
|
| 140 |
+
"delivered project on time",
|
| 141 |
+
"met project deadlines",
|
| 142 |
+
"managed milestones",
|
| 143 |
+
"handled project planning",
|
| 144 |
+
"released production features",
|
| 145 |
+
"coordinated deployment",
|
| 146 |
+
"delivered MVP",
|
| 147 |
+
"tracked KPIs",
|
| 148 |
+
"managed scope",
|
| 149 |
+
"created project timeline",
|
| 150 |
+
"ran retrospectives",
|
| 151 |
+
"managed feature rollout",
|
| 152 |
+
"ensured on-time delivery",
|
| 153 |
+
"performed release validations",
|
| 154 |
+
"deployed analytics dashboard",
|
| 155 |
+
"iterated based on feedback"
|
| 156 |
+
],
|
| 157 |
+
"initiative": [
|
| 158 |
+
"initiated a project",
|
| 159 |
+
"proposed a new idea",
|
| 160 |
+
"took initiative",
|
| 161 |
+
"started a side project",
|
| 162 |
+
"built a proof of concept",
|
| 163 |
+
"started a campus chapter",
|
| 164 |
+
"created an automation",
|
| 165 |
+
"improved an existing process",
|
| 166 |
+
"volunteered to lead",
|
| 167 |
+
"identified improvement areas",
|
| 168 |
+
"launched a mini-product",
|
| 169 |
+
"ran a pilot program",
|
| 170 |
+
"created onboarding scripts",
|
| 171 |
+
"led process improvements",
|
| 172 |
+
"started a mentoring circle"
|
| 173 |
+
],
|
| 174 |
+
"learning_agility": [
|
| 175 |
+
"quick learner",
|
| 176 |
+
"self-taught",
|
| 177 |
+
"learned new framework",
|
| 178 |
+
"picked up new language",
|
| 179 |
+
"adapted to new tech",
|
| 180 |
+
"completed online courses",
|
| 181 |
+
"upskilled via projects",
|
| 182 |
+
"transitioned domains",
|
| 183 |
+
"learned on the job",
|
| 184 |
+
"rapidly onboarded",
|
| 185 |
+
"attended workshops",
|
| 186 |
+
"completed bootcamp",
|
| 187 |
+
"took certification courses",
|
| 188 |
+
"learned through documentation",
|
| 189 |
+
"scaled knowledge quickly",
|
| 190 |
+
"adapted to changing scope"
|
| 191 |
+
],
|
| 192 |
+
"career_alignment": [
|
| 193 |
+
"career goal is",
|
| 194 |
+
"aspire to become",
|
| 195 |
+
"interested in data science",
|
| 196 |
+
"pursue a role in product",
|
| 197 |
+
"long-term goal",
|
| 198 |
+
"want to specialize in",
|
| 199 |
+
"career objective",
|
| 200 |
+
"planning to pursue masters",
|
| 201 |
+
"aim to work in industry",
|
| 202 |
+
"seek product management roles",
|
| 203 |
+
"interested in research",
|
| 204 |
+
"want to join a startup",
|
| 205 |
+
"targeting roles in ML engineering",
|
| 206 |
+
"aiming for consulting roles",
|
| 207 |
+
"career path is focused on"
|
| 208 |
+
]
|
| 209 |
+
}
|
config.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Configuration for Analytics Module"""
|
| 2 |
+
import os
|
| 3 |
+
from dotenv import load_dotenv
|
| 4 |
+
|
| 5 |
+
load_dotenv()
|
| 6 |
+
|
| 7 |
+
class Config:
|
| 8 |
+
# Supabase
|
| 9 |
+
SUPABASE_URL = os.getenv('SUPABASE_URL', 'https://hbesjuifeodgqrptpkch.supabase.co')
|
| 10 |
+
SUPABASE_KEY = os.getenv('SUPABASE_KEY', '')
|
| 11 |
+
|
| 12 |
+
# ML Models
|
| 13 |
+
SENTENCE_TRANSFORMER_MODEL = 'all-MiniLM-L6-v2'
|
| 14 |
+
|
| 15 |
+
# Scoring Weights
|
| 16 |
+
UNIVERSAL_WEIGHT = 0.50
|
| 17 |
+
PERSONALITY_WEIGHT = 0.25
|
| 18 |
+
TEXT_WEIGHT = 0.25
|
| 19 |
+
|
| 20 |
+
# Flask
|
| 21 |
+
DEBUG = os.getenv('DEBUG', 'True') == 'True'
|
| 22 |
+
PORT = int(os.getenv('PORT', 5001))
|
database/add_semester_columns.sql
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
-- Add missing semester columns to analytics_students table
|
| 2 |
+
-- Run this in Supabase SQL Editor
|
| 3 |
+
|
| 4 |
+
ALTER TABLE analytics_students
|
| 5 |
+
ADD COLUMN IF NOT EXISTS sgpa_sem1 REAL CHECK (sgpa_sem1 >= 0 AND sgpa_sem1 <= 10),
|
| 6 |
+
ADD COLUMN IF NOT EXISTS sgpa_sem2 REAL CHECK (sgpa_sem2 >= 0 AND sgpa_sem2 <= 10),
|
| 7 |
+
ADD COLUMN IF NOT EXISTS sgpa_sem3 REAL CHECK (sgpa_sem3 >= 0 AND sgpa_sem3 <= 10),
|
| 8 |
+
ADD COLUMN IF NOT EXISTS sgpa_sem7 REAL CHECK (sgpa_sem7 >= 0 AND sgpa_sem7 <= 10),
|
| 9 |
+
ADD COLUMN IF NOT EXISTS sgpa_sem8 REAL CHECK (sgpa_sem8 >= 0 AND sgpa_sem8 <= 10);
|
database/db.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Database connection and utilities"""
|
| 2 |
+
from supabase import create_client, Client
|
| 3 |
+
from config import Config
|
| 4 |
+
|
| 5 |
+
# Initialize Supabase client
|
| 6 |
+
supabase: Client = create_client(Config.SUPABASE_URL, Config.SUPABASE_KEY)
|
| 7 |
+
|
| 8 |
+
def get_db():
|
| 9 |
+
"""Get Supabase client instance"""
|
| 10 |
+
return supabase
|
database/migrate_domain_module.sql
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
-- Domain Module Migration Script
|
| 2 |
+
-- Run this in Supabase SQL Editor to add domain-specific scoring support
|
| 3 |
+
-- Date: December 9, 2025
|
| 4 |
+
|
| 5 |
+
-- ============================================================================
|
| 6 |
+
-- STEP 1: Add domain fields to existing analytics_students table
|
| 7 |
+
-- ============================================================================
|
| 8 |
+
|
| 9 |
+
ALTER TABLE analytics_students
|
| 10 |
+
ADD COLUMN IF NOT EXISTS active_domain TEXT CHECK (active_domain IN ('tech', 'business', 'creative', 'research', NULL)),
|
| 11 |
+
ADD COLUMN IF NOT EXISTS domain_score REAL CHECK (domain_score >= 0 AND domain_score <= 1),
|
| 12 |
+
ADD COLUMN IF NOT EXISTS domain_confidence REAL CHECK (domain_confidence >= 0 AND domain_confidence <= 1);
|
| 13 |
+
|
| 14 |
+
-- ============================================================================
|
| 15 |
+
-- STEP 2: Create domain evidence table
|
| 16 |
+
-- ============================================================================
|
| 17 |
+
|
| 18 |
+
CREATE TABLE IF NOT EXISTS analytics_domain_evidence (
|
| 19 |
+
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
| 20 |
+
student_id TEXT REFERENCES analytics_students(student_id) ON DELETE CASCADE,
|
| 21 |
+
domain_type TEXT NOT NULL CHECK (domain_type IN ('tech', 'business', 'creative', 'research')),
|
| 22 |
+
evidence_data JSONB NOT NULL,
|
| 23 |
+
domain_score REAL CHECK (domain_score >= 0 AND domain_score <= 1),
|
| 24 |
+
domain_confidence REAL CHECK (domain_confidence >= 0 AND domain_confidence <= 1),
|
| 25 |
+
raw_features JSONB,
|
| 26 |
+
processing_status TEXT DEFAULT 'pending' CHECK (processing_status IN ('pending', 'processing', 'completed', 'failed')),
|
| 27 |
+
error_message TEXT,
|
| 28 |
+
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
|
| 29 |
+
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
|
| 30 |
+
UNIQUE(student_id, domain_type)
|
| 31 |
+
);
|
| 32 |
+
|
| 33 |
+
-- ============================================================================
|
| 34 |
+
-- STEP 3: Create indexes for performance
|
| 35 |
+
-- ============================================================================
|
| 36 |
+
|
| 37 |
+
CREATE INDEX IF NOT EXISTS idx_domain_evidence_student ON analytics_domain_evidence(student_id);
|
| 38 |
+
CREATE INDEX IF NOT EXISTS idx_domain_evidence_type ON analytics_domain_evidence(domain_type);
|
| 39 |
+
CREATE INDEX IF NOT EXISTS idx_domain_evidence_status ON analytics_domain_evidence(processing_status);
|
| 40 |
+
|
| 41 |
+
-- ============================================================================
|
| 42 |
+
-- STEP 4: Enable Row Level Security
|
| 43 |
+
-- ============================================================================
|
| 44 |
+
|
| 45 |
+
ALTER TABLE analytics_domain_evidence ENABLE ROW LEVEL SECURITY;
|
| 46 |
+
|
| 47 |
+
-- ============================================================================
|
| 48 |
+
-- STEP 5: Create RLS Policies for domain evidence
|
| 49 |
+
-- ============================================================================
|
| 50 |
+
|
| 51 |
+
-- Users can view their own domain evidence
|
| 52 |
+
CREATE POLICY "Users can view own domain evidence"
|
| 53 |
+
ON analytics_domain_evidence FOR SELECT
|
| 54 |
+
TO authenticated
|
| 55 |
+
USING (student_id IN (SELECT student_id FROM analytics_students WHERE user_id = auth.uid()));
|
| 56 |
+
|
| 57 |
+
-- Users can insert their own domain evidence
|
| 58 |
+
CREATE POLICY "Users can insert own domain evidence"
|
| 59 |
+
ON analytics_domain_evidence FOR INSERT
|
| 60 |
+
TO authenticated
|
| 61 |
+
WITH CHECK (student_id IN (SELECT student_id FROM analytics_students WHERE user_id = auth.uid()));
|
| 62 |
+
|
| 63 |
+
-- Users can update their own domain evidence
|
| 64 |
+
CREATE POLICY "Users can update own domain evidence"
|
| 65 |
+
ON analytics_domain_evidence FOR UPDATE
|
| 66 |
+
TO authenticated
|
| 67 |
+
USING (student_id IN (SELECT student_id FROM analytics_students WHERE user_id = auth.uid()));
|
| 68 |
+
|
| 69 |
+
-- Users can delete their own domain evidence
|
| 70 |
+
CREATE POLICY "Users can delete own domain evidence"
|
| 71 |
+
ON analytics_domain_evidence FOR DELETE
|
| 72 |
+
TO authenticated
|
| 73 |
+
USING (student_id IN (SELECT student_id FROM analytics_students WHERE user_id = auth.uid()));
|
| 74 |
+
|
| 75 |
+
-- ============================================================================
|
| 76 |
+
-- STEP 6: Verification queries (run these to verify successful migration)
|
| 77 |
+
-- ============================================================================
|
| 78 |
+
|
| 79 |
+
-- Check if columns were added
|
| 80 |
+
SELECT column_name, data_type
|
| 81 |
+
FROM information_schema.columns
|
| 82 |
+
WHERE table_name = 'analytics_students'
|
| 83 |
+
AND column_name IN ('active_domain', 'domain_score', 'domain_confidence');
|
| 84 |
+
|
| 85 |
+
-- Check if table was created
|
| 86 |
+
SELECT table_name
|
| 87 |
+
FROM information_schema.tables
|
| 88 |
+
WHERE table_name = 'analytics_domain_evidence';
|
| 89 |
+
|
| 90 |
+
-- Check if indexes were created
|
| 91 |
+
SELECT indexname
|
| 92 |
+
FROM pg_indexes
|
| 93 |
+
WHERE tablename = 'analytics_domain_evidence';
|
| 94 |
+
|
| 95 |
+
-- Check if RLS policies were created
|
| 96 |
+
SELECT policyname
|
| 97 |
+
FROM pg_policies
|
| 98 |
+
WHERE tablename = 'analytics_domain_evidence';
|
| 99 |
+
|
| 100 |
+
-- ============================================================================
|
| 101 |
+
-- Migration Complete!
|
| 102 |
+
-- ============================================================================
|
| 103 |
+
|
| 104 |
+
-- Expected results:
|
| 105 |
+
-- ✓ 3 new columns in analytics_students table
|
| 106 |
+
-- ✓ 1 new table: analytics_domain_evidence
|
| 107 |
+
-- ✓ 3 new indexes
|
| 108 |
+
-- ✓ 4 new RLS policies
|
| 109 |
+
|
| 110 |
+
-- Next steps:
|
| 111 |
+
-- 1. Restart your Flask backend: python app.py
|
| 112 |
+
-- 2. Test domain submission via API or frontend form
|
| 113 |
+
-- 3. Verify score fusion includes domain component
|
database/migrate_to_text_fields.sql
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
-- Migration: Replace numeric fields with text fields for extracurricular, certifications, and internships
|
| 2 |
+
-- Run this in Supabase SQL Editor
|
| 3 |
+
|
| 4 |
+
-- Add new text columns
|
| 5 |
+
ALTER TABLE analytics_students
|
| 6 |
+
ADD COLUMN IF NOT EXISTS extracurricular_text TEXT,
|
| 7 |
+
ADD COLUMN IF NOT EXISTS certifications_text TEXT,
|
| 8 |
+
ADD COLUMN IF NOT EXISTS internship_text TEXT;
|
| 9 |
+
|
| 10 |
+
-- Optional: Drop old numeric columns if you want to clean up
|
| 11 |
+
-- Uncomment these lines after verifying the new text fields work
|
| 12 |
+
-- ALTER TABLE analytics_students DROP COLUMN IF EXISTS extracurricular_count;
|
| 13 |
+
-- ALTER TABLE analytics_students DROP COLUMN IF EXISTS certifications_count;
|
| 14 |
+
-- ALTER TABLE analytics_students DROP COLUMN IF EXISTS internship_total_months;
|
| 15 |
+
|
| 16 |
+
-- Note: If you want to keep both old and new columns during transition,
|
| 17 |
+
-- you can skip dropping the old columns and they will coexist.
|
database/schema.sql
ADDED
|
@@ -0,0 +1,151 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
-- Analytics Module Schema for Supabase
|
| 2 |
+
-- Run this in Supabase SQL Editor
|
| 3 |
+
|
| 4 |
+
-- 1. Students Table
|
| 5 |
+
CREATE TABLE IF NOT EXISTS analytics_students (
|
| 6 |
+
student_id TEXT PRIMARY KEY,
|
| 7 |
+
user_id UUID REFERENCES auth.users(id) ON DELETE CASCADE,
|
| 8 |
+
cgpa REAL NOT NULL CHECK (cgpa >= 0 AND cgpa <= 10),
|
| 9 |
+
sgpa_sem1 REAL CHECK (sgpa_sem1 >= 0 AND sgpa_sem1 <= 10),
|
| 10 |
+
sgpa_sem2 REAL CHECK (sgpa_sem2 >= 0 AND sgpa_sem2 <= 10),
|
| 11 |
+
sgpa_sem3 REAL CHECK (sgpa_sem3 >= 0 AND sgpa_sem3 <= 10),
|
| 12 |
+
sgpa_sem4 REAL CHECK (sgpa_sem4 >= 0 AND sgpa_sem4 <= 10),
|
| 13 |
+
sgpa_sem5 REAL CHECK (sgpa_sem5 >= 0 AND sgpa_sem5 <= 10),
|
| 14 |
+
sgpa_sem6 REAL CHECK (sgpa_sem6 >= 0 AND sgpa_sem6 <= 10),
|
| 15 |
+
sgpa_sem7 REAL CHECK (sgpa_sem7 >= 0 AND sgpa_sem7 <= 10),
|
| 16 |
+
sgpa_sem8 REAL CHECK (sgpa_sem8 >= 0 AND sgpa_sem8 <= 10),
|
| 17 |
+
tenth_pct REAL CHECK (tenth_pct >= 0 AND tenth_pct <= 100),
|
| 18 |
+
twelfth_pct REAL CHECK (twelfth_pct >= 0 AND twelfth_pct <= 100),
|
| 19 |
+
extracurricular_text TEXT,
|
| 20 |
+
certifications_text TEXT,
|
| 21 |
+
internship_text TEXT,
|
| 22 |
+
active_domain TEXT CHECK (active_domain IN ('tech', 'business', 'creative', 'research', NULL)),
|
| 23 |
+
domain_score REAL CHECK (domain_score >= 0 AND domain_score <= 1),
|
| 24 |
+
domain_confidence REAL CHECK (domain_confidence >= 0 AND domain_confidence <= 1),
|
| 25 |
+
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
|
| 26 |
+
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
|
| 27 |
+
);
|
| 28 |
+
|
| 29 |
+
-- 2. Personality Responses Table
|
| 30 |
+
CREATE TABLE IF NOT EXISTS analytics_personality_responses (
|
| 31 |
+
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
| 32 |
+
student_id TEXT REFERENCES analytics_students(student_id) ON DELETE CASCADE,
|
| 33 |
+
p_q1 INTEGER CHECK (p_q1 >= 1 AND p_q1 <= 5),
|
| 34 |
+
p_q2 INTEGER CHECK (p_q2 >= 1 AND p_q2 <= 5),
|
| 35 |
+
p_q3 INTEGER CHECK (p_q3 >= 1 AND p_q3 <= 5),
|
| 36 |
+
p_q4 INTEGER CHECK (p_q4 >= 1 AND p_q4 <= 5),
|
| 37 |
+
p_q5 INTEGER CHECK (p_q5 >= 1 AND p_q5 <= 5),
|
| 38 |
+
p_q6 INTEGER CHECK (p_q6 >= 1 AND p_q6 <= 5),
|
| 39 |
+
p_q7 INTEGER CHECK (p_q7 >= 1 AND p_q7 <= 5),
|
| 40 |
+
p_q8 INTEGER CHECK (p_q8 >= 1 AND p_q8 <= 5),
|
| 41 |
+
p_q9 INTEGER CHECK (p_q9 >= 1 AND p_q9 <= 5),
|
| 42 |
+
p_q10 INTEGER CHECK (p_q10 >= 1 AND p_q10 <= 5),
|
| 43 |
+
p_q11 INTEGER CHECK (p_q11 >= 1 AND p_q11 <= 5),
|
| 44 |
+
p_q12 INTEGER CHECK (p_q12 >= 1 AND p_q12 <= 5),
|
| 45 |
+
p_q13 INTEGER CHECK (p_q13 >= 1 AND p_q13 <= 5),
|
| 46 |
+
p_q14 INTEGER CHECK (p_q14 >= 1 AND p_q14 <= 5),
|
| 47 |
+
p_q15 INTEGER CHECK (p_q15 >= 1 AND p_q15 <= 5),
|
| 48 |
+
p_q16 INTEGER CHECK (p_q16 >= 1 AND p_q16 <= 5),
|
| 49 |
+
p_q17 INTEGER CHECK (p_q17 >= 1 AND p_q17 <= 5),
|
| 50 |
+
p_q18 INTEGER CHECK (p_q18 >= 1 AND p_q18 <= 5),
|
| 51 |
+
p_q19 INTEGER CHECK (p_q19 >= 1 AND p_q19 <= 5),
|
| 52 |
+
p_q20 INTEGER CHECK (p_q20 >= 1 AND p_q20 <= 5),
|
| 53 |
+
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
|
| 54 |
+
UNIQUE(student_id)
|
| 55 |
+
);
|
| 56 |
+
|
| 57 |
+
-- 3. Text Responses Table
|
| 58 |
+
CREATE TABLE IF NOT EXISTS analytics_text_responses (
|
| 59 |
+
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
| 60 |
+
student_id TEXT REFERENCES analytics_students(student_id) ON DELETE CASCADE,
|
| 61 |
+
text_q1 TEXT NOT NULL, -- Strengths
|
| 62 |
+
text_q2 TEXT NOT NULL, -- Career interests
|
| 63 |
+
text_q3 TEXT NOT NULL, -- Extracurriculars + leadership
|
| 64 |
+
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
|
| 65 |
+
UNIQUE(student_id)
|
| 66 |
+
);
|
| 67 |
+
|
| 68 |
+
-- 4. Domain Evidence Table
|
| 69 |
+
CREATE TABLE IF NOT EXISTS analytics_domain_evidence (
|
| 70 |
+
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
| 71 |
+
student_id TEXT REFERENCES analytics_students(student_id) ON DELETE CASCADE,
|
| 72 |
+
domain_type TEXT NOT NULL CHECK (domain_type IN ('tech', 'business', 'creative', 'research')),
|
| 73 |
+
evidence_data JSONB NOT NULL, -- Flexible storage for domain-specific inputs
|
| 74 |
+
domain_score REAL CHECK (domain_score >= 0 AND domain_score <= 1),
|
| 75 |
+
domain_confidence REAL CHECK (domain_confidence >= 0 AND domain_confidence <= 1),
|
| 76 |
+
raw_features JSONB, -- Raw feature values for explainability
|
| 77 |
+
processing_status TEXT DEFAULT 'pending' CHECK (processing_status IN ('pending', 'processing', 'completed', 'failed')),
|
| 78 |
+
error_message TEXT,
|
| 79 |
+
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
|
| 80 |
+
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
|
| 81 |
+
UNIQUE(student_id, domain_type)
|
| 82 |
+
);
|
| 83 |
+
|
| 84 |
+
-- Indexes
|
| 85 |
+
CREATE INDEX IF NOT EXISTS idx_analytics_students_user_id ON analytics_students(user_id);
|
| 86 |
+
CREATE INDEX IF NOT EXISTS idx_personality_student ON analytics_personality_responses(student_id);
|
| 87 |
+
CREATE INDEX IF NOT EXISTS idx_text_student ON analytics_text_responses(student_id);
|
| 88 |
+
CREATE INDEX IF NOT EXISTS idx_domain_evidence_student ON analytics_domain_evidence(student_id);
|
| 89 |
+
CREATE INDEX IF NOT EXISTS idx_domain_evidence_type ON analytics_domain_evidence(domain_type);
|
| 90 |
+
CREATE INDEX IF NOT EXISTS idx_domain_evidence_status ON analytics_domain_evidence(processing_status);
|
| 91 |
+
|
| 92 |
+
-- RLS Policies
|
| 93 |
+
ALTER TABLE analytics_students ENABLE ROW LEVEL SECURITY;
|
| 94 |
+
ALTER TABLE analytics_personality_responses ENABLE ROW LEVEL SECURITY;
|
| 95 |
+
ALTER TABLE analytics_text_responses ENABLE ROW LEVEL SECURITY;
|
| 96 |
+
|
| 97 |
+
-- Students can view/update their own data
|
| 98 |
+
CREATE POLICY "Users can view own analytics data"
|
| 99 |
+
ON analytics_students FOR SELECT
|
| 100 |
+
TO authenticated
|
| 101 |
+
USING (user_id = auth.uid());
|
| 102 |
+
|
| 103 |
+
CREATE POLICY "Users can insert own analytics data"
|
| 104 |
+
ON analytics_students FOR INSERT
|
| 105 |
+
TO authenticated
|
| 106 |
+
WITH CHECK (user_id = auth.uid());
|
| 107 |
+
|
| 108 |
+
CREATE POLICY "Users can update own analytics data"
|
| 109 |
+
ON analytics_students FOR UPDATE
|
| 110 |
+
TO authenticated
|
| 111 |
+
USING (user_id = auth.uid());
|
| 112 |
+
|
| 113 |
+
-- Personality responses
|
| 114 |
+
CREATE POLICY "Users can view own personality responses"
|
| 115 |
+
ON analytics_personality_responses FOR SELECT
|
| 116 |
+
TO authenticated
|
| 117 |
+
USING (student_id IN (SELECT student_id FROM analytics_students WHERE user_id = auth.uid()));
|
| 118 |
+
|
| 119 |
+
CREATE POLICY "Users can insert own personality responses"
|
| 120 |
+
ON analytics_personality_responses FOR INSERT
|
| 121 |
+
-- Text responses
|
| 122 |
+
CREATE POLICY "Users can view own text responses"
|
| 123 |
+
ON analytics_text_responses FOR SELECT
|
| 124 |
+
TO authenticated
|
| 125 |
+
USING (student_id IN (SELECT student_id FROM analytics_students WHERE user_id = auth.uid()));
|
| 126 |
+
|
| 127 |
+
CREATE POLICY "Users can insert own text responses"
|
| 128 |
+
ON analytics_text_responses FOR INSERT
|
| 129 |
+
TO authenticated
|
| 130 |
+
WITH CHECK (student_id IN (SELECT student_id FROM analytics_students WHERE user_id = auth.uid()));
|
| 131 |
+
|
| 132 |
+
-- Domain evidence
|
| 133 |
+
ALTER TABLE analytics_domain_evidence ENABLE ROW LEVEL SECURITY;
|
| 134 |
+
|
| 135 |
+
CREATE POLICY "Users can view own domain evidence"
|
| 136 |
+
ON analytics_domain_evidence FOR SELECT
|
| 137 |
+
TO authenticated
|
| 138 |
+
USING (student_id IN (SELECT student_id FROM analytics_students WHERE user_id = auth.uid()));
|
| 139 |
+
|
| 140 |
+
CREATE POLICY "Users can insert own domain evidence"
|
| 141 |
+
ON analytics_domain_evidence FOR INSERT
|
| 142 |
+
TO authenticated
|
| 143 |
+
WITH CHECK (student_id IN (SELECT student_id FROM analytics_students WHERE user_id = auth.uid()));
|
| 144 |
+
|
| 145 |
+
CREATE POLICY "Users can update own domain evidence"
|
| 146 |
+
ON analytics_domain_evidence FOR UPDATE
|
| 147 |
+
TO authenticated
|
| 148 |
+
USING (student_id IN (SELECT student_id FROM analytics_students WHERE user_id = auth.uid()));
|
| 149 |
+
ON analytics_text_responses FOR INSERT
|
| 150 |
+
TO authenticated
|
| 151 |
+
WITH CHECK (student_id IN (SELECT student_id FROM analytics_students WHERE user_id = auth.uid()));
|
domains/data_science.json
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"domain_id": "data_science",
|
| 3 |
+
"display_name": "Data Science & Analytics",
|
| 4 |
+
"description": "Machine Learning, Data Analysis, AI Research, and Business Intelligence",
|
| 5 |
+
"core_skills": [
|
| 6 |
+
"python",
|
| 7 |
+
"r",
|
| 8 |
+
"sql",
|
| 9 |
+
"pandas",
|
| 10 |
+
"numpy",
|
| 11 |
+
"scikit_learn",
|
| 12 |
+
"tensorflow",
|
| 13 |
+
"pytorch",
|
| 14 |
+
"keras",
|
| 15 |
+
"xgboost",
|
| 16 |
+
"tableau",
|
| 17 |
+
"power_bi",
|
| 18 |
+
"matplotlib",
|
| 19 |
+
"seaborn",
|
| 20 |
+
"statistics",
|
| 21 |
+
"ab_testing",
|
| 22 |
+
"feature_engineering",
|
| 23 |
+
"spark",
|
| 24 |
+
"hadoop",
|
| 25 |
+
"airflow",
|
| 26 |
+
"dbt"
|
| 27 |
+
],
|
| 28 |
+
"aspect_prototypes": {
|
| 29 |
+
"technical_skills": [
|
| 30 |
+
"built machine learning models using scikit-learn and XGBoost",
|
| 31 |
+
"developed deep learning pipelines with PyTorch",
|
| 32 |
+
"created ETL jobs using PySpark for big data processing",
|
| 33 |
+
"trained neural networks for image classification",
|
| 34 |
+
"implemented NLP models using transformers and BERT",
|
| 35 |
+
"designed feature engineering pipelines for ML",
|
| 36 |
+
"built recommendation systems using collaborative filtering",
|
| 37 |
+
"deployed ML models to production with MLflow",
|
| 38 |
+
"created interactive dashboards in Tableau",
|
| 39 |
+
"performed A/B testing with statistical significance analysis"
|
| 40 |
+
],
|
| 41 |
+
"problem_solving": [
|
| 42 |
+
"improved model accuracy from 78% to 92% through feature engineering",
|
| 43 |
+
"reduced model training time by 60% using distributed computing",
|
| 44 |
+
"diagnosed and fixed data leakage in ML pipeline",
|
| 45 |
+
"optimized hyperparameters using Bayesian optimization",
|
| 46 |
+
"handled class imbalance with SMOTE and weighted sampling",
|
| 47 |
+
"debugged data quality issues affecting model performance",
|
| 48 |
+
"designed experiment to measure causal impact of recommendation",
|
| 49 |
+
"created automated anomaly detection system",
|
| 50 |
+
"resolved data drift issues in production models",
|
| 51 |
+
"built interpretable models for regulatory compliance"
|
| 52 |
+
],
|
| 53 |
+
"leadership": [
|
| 54 |
+
"led data science team of 4 on personalization project",
|
| 55 |
+
"presented ML insights to C-level stakeholders",
|
| 56 |
+
"coordinated with engineering for model deployment",
|
| 57 |
+
"organized data science reading group in company",
|
| 58 |
+
"mentored junior analysts on SQL and Python",
|
| 59 |
+
"drove adoption of MLOps best practices",
|
| 60 |
+
"led cross-functional project with marketing team",
|
| 61 |
+
"managed data labeling team for annotation project",
|
| 62 |
+
"conducted training sessions on Pandas for analysts",
|
| 63 |
+
"championed experiment-driven decision making culture"
|
| 64 |
+
],
|
| 65 |
+
"internship_experience": [
|
| 66 |
+
"data science intern at Flipkart building recommendation models",
|
| 67 |
+
"ML research intern at Google Brain working on NLP",
|
| 68 |
+
"analytics intern at McKinsey for retail optimization",
|
| 69 |
+
"AI intern at NVIDIA on computer vision projects",
|
| 70 |
+
"research intern at IISc on deep learning",
|
| 71 |
+
"data analyst intern at Zomato for demand forecasting",
|
| 72 |
+
"business intelligence intern at Amazon building dashboards",
|
| 73 |
+
"ML platform intern at Meta for model serving",
|
| 74 |
+
"quantitative research intern at Goldman Sachs",
|
| 75 |
+
"applied scientist intern at AWS on personalization"
|
| 76 |
+
]
|
| 77 |
+
},
|
| 78 |
+
"industry_benchmarks": {
|
| 79 |
+
"min_employability_score": 0.65,
|
| 80 |
+
"expected_cgpa": 8.0,
|
| 81 |
+
"expected_internship_months": 4,
|
| 82 |
+
"critical_skills": [
|
| 83 |
+
"python",
|
| 84 |
+
"sql",
|
| 85 |
+
"statistics",
|
| 86 |
+
"ml_fundamentals"
|
| 87 |
+
],
|
| 88 |
+
"nice_to_have_skills": [
|
| 89 |
+
"deep_learning",
|
| 90 |
+
"spark",
|
| 91 |
+
"mlops",
|
| 92 |
+
"cloud"
|
| 93 |
+
]
|
| 94 |
+
},
|
| 95 |
+
"skill_gaps_mapping": {
|
| 96 |
+
"deep_learning": {
|
| 97 |
+
"demand_score": 0.80,
|
| 98 |
+
"courses": [
|
| 99 |
+
"Deep Learning Specialization",
|
| 100 |
+
"Fast.ai",
|
| 101 |
+
"Stanford CS231n"
|
| 102 |
+
],
|
| 103 |
+
"certifications": [
|
| 104 |
+
"TensorFlow Developer",
|
| 105 |
+
"PyTorch Certified"
|
| 106 |
+
]
|
| 107 |
+
},
|
| 108 |
+
"mlops": {
|
| 109 |
+
"demand_score": 0.75,
|
| 110 |
+
"courses": [
|
| 111 |
+
"MLOps Specialization",
|
| 112 |
+
"ML Engineering for Production"
|
| 113 |
+
],
|
| 114 |
+
"certifications": [
|
| 115 |
+
"AWS ML Specialty",
|
| 116 |
+
"GCP ML Engineer"
|
| 117 |
+
]
|
| 118 |
+
},
|
| 119 |
+
"statistics": {
|
| 120 |
+
"demand_score": 0.70,
|
| 121 |
+
"courses": [
|
| 122 |
+
"Statistics for Data Science",
|
| 123 |
+
"A/B Testing Masterclass"
|
| 124 |
+
],
|
| 125 |
+
"certifications": []
|
| 126 |
+
},
|
| 127 |
+
"big_data": {
|
| 128 |
+
"demand_score": 0.65,
|
| 129 |
+
"courses": [
|
| 130 |
+
"Spark for Data Engineering",
|
| 131 |
+
"Databricks Academy"
|
| 132 |
+
],
|
| 133 |
+
"certifications": [
|
| 134 |
+
"Databricks Certified",
|
| 135 |
+
"Cloudera CCA"
|
| 136 |
+
]
|
| 137 |
+
}
|
| 138 |
+
},
|
| 139 |
+
"detection_keywords": [
|
| 140 |
+
"data science",
|
| 141 |
+
"machine learning",
|
| 142 |
+
"deep learning",
|
| 143 |
+
"ai",
|
| 144 |
+
"analytics",
|
| 145 |
+
"data analyst",
|
| 146 |
+
"ml engineer",
|
| 147 |
+
"research scientist",
|
| 148 |
+
"business intelligence",
|
| 149 |
+
"statistical modeling",
|
| 150 |
+
"predictive analytics",
|
| 151 |
+
"data mining"
|
| 152 |
+
]
|
| 153 |
+
}
|
domains/mechanical_engineering.json
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"domain_id": "mechanical_engineering",
|
| 3 |
+
"display_name": "Mechanical Engineering",
|
| 4 |
+
"description": "Design, Manufacturing, Automotive, and Core Engineering",
|
| 5 |
+
"core_skills": [
|
| 6 |
+
"autocad",
|
| 7 |
+
"solidworks",
|
| 8 |
+
"catia",
|
| 9 |
+
"ansys",
|
| 10 |
+
"matlab",
|
| 11 |
+
"thermodynamics",
|
| 12 |
+
"fluid_mechanics",
|
| 13 |
+
"heat_transfer",
|
| 14 |
+
"manufacturing",
|
| 15 |
+
"cnc",
|
| 16 |
+
"3d_printing",
|
| 17 |
+
"gd_t",
|
| 18 |
+
"fea",
|
| 19 |
+
"cfd",
|
| 20 |
+
"product_design",
|
| 21 |
+
"quality_control"
|
| 22 |
+
],
|
| 23 |
+
"aspect_prototypes": {
|
| 24 |
+
"technical_skills": [
|
| 25 |
+
"designed complex assemblies in SolidWorks and CATIA",
|
| 26 |
+
"performed FEA analysis using ANSYS for structural optimization",
|
| 27 |
+
"created CFD simulations for fluid flow optimization",
|
| 28 |
+
"developed CNC programs for precision machining",
|
| 29 |
+
"implemented GD&T for manufacturing tolerances",
|
| 30 |
+
"designed heat exchangers using thermal analysis",
|
| 31 |
+
"prototyped parts using 3D printing and rapid prototyping",
|
| 32 |
+
"conducted DFMEA for product reliability",
|
| 33 |
+
"created engineering drawings following ASME standards",
|
| 34 |
+
"optimized product design reducing weight by 20%"
|
| 35 |
+
],
|
| 36 |
+
"problem_solving": [
|
| 37 |
+
"resolved vibration issue in rotating machinery",
|
| 38 |
+
"optimized manufacturing process reducing cycle time by 30%",
|
| 39 |
+
"diagnosed failure mode using root cause analysis",
|
| 40 |
+
"redesigned component eliminating stress concentration",
|
| 41 |
+
"improved product yield from 85% to 98% through quality control",
|
| 42 |
+
"solved thermal management problem in electronic enclosure",
|
| 43 |
+
"reduced material waste by 25% through lean manufacturing",
|
| 44 |
+
"fixed tolerance stack-up issue causing assembly problems",
|
| 45 |
+
"automated inspection process using machine vision",
|
| 46 |
+
"designed jig and fixture reducing setup time"
|
| 47 |
+
],
|
| 48 |
+
"leadership": [
|
| 49 |
+
"led BAJA SAE team of 20 members as captain",
|
| 50 |
+
"managed product development project from concept to production",
|
| 51 |
+
"coordinated with suppliers for component sourcing",
|
| 52 |
+
"organized SAE chapter events with 200+ participants",
|
| 53 |
+
"mentored junior designers on CAD and simulation tools",
|
| 54 |
+
"led quality improvement initiative on production floor",
|
| 55 |
+
"managed cross-functional team for product launch",
|
| 56 |
+
"conducted design reviews with stakeholders",
|
| 57 |
+
"led vendor qualification and development program",
|
| 58 |
+
"organized technical workshops on new manufacturing methods"
|
| 59 |
+
],
|
| 60 |
+
"internship_experience": [
|
| 61 |
+
"6 months design intern at Tata Motors in R&D division",
|
| 62 |
+
"summer internship at Mahindra on EV powertrain",
|
| 63 |
+
"manufacturing intern at L&T in heavy engineering",
|
| 64 |
+
"R&D intern at Bosch on automotive components",
|
| 65 |
+
"product design intern at Godrej appliances division",
|
| 66 |
+
"quality engineering intern at Maruti Suzuki",
|
| 67 |
+
"CAE analyst intern at TAFE for tractor design",
|
| 68 |
+
"tool design intern at Hero MotoCorp",
|
| 69 |
+
"thermal analysis intern at Thermax",
|
| 70 |
+
"research intern at IIT Madras on composite materials"
|
| 71 |
+
]
|
| 72 |
+
},
|
| 73 |
+
"industry_benchmarks": {
|
| 74 |
+
"min_employability_score": 0.55,
|
| 75 |
+
"expected_cgpa": 7.0,
|
| 76 |
+
"expected_internship_months": 3,
|
| 77 |
+
"critical_skills": [
|
| 78 |
+
"cad",
|
| 79 |
+
"manufacturing_basics",
|
| 80 |
+
"engineering_drawing"
|
| 81 |
+
],
|
| 82 |
+
"nice_to_have_skills": [
|
| 83 |
+
"fea",
|
| 84 |
+
"cfd",
|
| 85 |
+
"python",
|
| 86 |
+
"automation"
|
| 87 |
+
]
|
| 88 |
+
},
|
| 89 |
+
"skill_gaps_mapping": {
|
| 90 |
+
"cae_simulation": {
|
| 91 |
+
"demand_score": 0.70,
|
| 92 |
+
"courses": [
|
| 93 |
+
"ANSYS Certification",
|
| 94 |
+
"CATIA V5 Mastery"
|
| 95 |
+
],
|
| 96 |
+
"certifications": [
|
| 97 |
+
"CSWA",
|
| 98 |
+
"CSWP",
|
| 99 |
+
"ANSYS Certified"
|
| 100 |
+
]
|
| 101 |
+
},
|
| 102 |
+
"ev_powertrain": {
|
| 103 |
+
"demand_score": 0.75,
|
| 104 |
+
"courses": [
|
| 105 |
+
"Electric Vehicle Technology",
|
| 106 |
+
"Battery Management Systems"
|
| 107 |
+
],
|
| 108 |
+
"certifications": [
|
| 109 |
+
"EV Design Certification"
|
| 110 |
+
]
|
| 111 |
+
},
|
| 112 |
+
"automation": {
|
| 113 |
+
"demand_score": 0.65,
|
| 114 |
+
"courses": [
|
| 115 |
+
"Industrial Automation",
|
| 116 |
+
"PLC Programming"
|
| 117 |
+
],
|
| 118 |
+
"certifications": [
|
| 119 |
+
"Siemens TIA Portal",
|
| 120 |
+
"Allen Bradley"
|
| 121 |
+
]
|
| 122 |
+
},
|
| 123 |
+
"industry_4_0": {
|
| 124 |
+
"demand_score": 0.60,
|
| 125 |
+
"courses": [
|
| 126 |
+
"IoT for Manufacturing",
|
| 127 |
+
"Digital Twin Technology"
|
| 128 |
+
],
|
| 129 |
+
"certifications": [
|
| 130 |
+
"Industry 4.0 Certification"
|
| 131 |
+
]
|
| 132 |
+
}
|
| 133 |
+
},
|
| 134 |
+
"detection_keywords": [
|
| 135 |
+
"mechanical",
|
| 136 |
+
"design engineer",
|
| 137 |
+
"manufacturing",
|
| 138 |
+
"automotive",
|
| 139 |
+
"product design",
|
| 140 |
+
"cad",
|
| 141 |
+
"solidworks",
|
| 142 |
+
"catia",
|
| 143 |
+
"ansys",
|
| 144 |
+
"thermodynamics",
|
| 145 |
+
"heat transfer",
|
| 146 |
+
"fluid mechanics"
|
| 147 |
+
]
|
| 148 |
+
}
|
domains/software_engineering.json
ADDED
|
@@ -0,0 +1,162 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"domain_id": "software_engineering",
|
| 3 |
+
"display_name": "Software Engineering",
|
| 4 |
+
"description": "Backend, Frontend, Full-stack, DevOps, and general software development",
|
| 5 |
+
"core_skills": [
|
| 6 |
+
"python",
|
| 7 |
+
"java",
|
| 8 |
+
"javascript",
|
| 9 |
+
"typescript",
|
| 10 |
+
"golang",
|
| 11 |
+
"c++",
|
| 12 |
+
"react",
|
| 13 |
+
"nodejs",
|
| 14 |
+
"django",
|
| 15 |
+
"spring",
|
| 16 |
+
"fastapi",
|
| 17 |
+
"flask",
|
| 18 |
+
"sql",
|
| 19 |
+
"postgresql",
|
| 20 |
+
"mongodb",
|
| 21 |
+
"redis",
|
| 22 |
+
"docker",
|
| 23 |
+
"kubernetes",
|
| 24 |
+
"aws",
|
| 25 |
+
"gcp",
|
| 26 |
+
"azure",
|
| 27 |
+
"git",
|
| 28 |
+
"ci_cd",
|
| 29 |
+
"testing",
|
| 30 |
+
"system_design"
|
| 31 |
+
],
|
| 32 |
+
"aspect_prototypes": {
|
| 33 |
+
"technical_skills": [
|
| 34 |
+
"developed RESTful APIs using FastAPI",
|
| 35 |
+
"built microservices architecture with Docker",
|
| 36 |
+
"implemented CI/CD pipelines for automated deployment",
|
| 37 |
+
"designed database schemas for high-traffic applications",
|
| 38 |
+
"wrote production-grade Python code with unit tests",
|
| 39 |
+
"deployed applications to AWS using ECS and Lambda",
|
| 40 |
+
"built React frontend with Redux state management",
|
| 41 |
+
"optimized SQL queries reducing latency by 50%",
|
| 42 |
+
"implemented caching layer with Redis",
|
| 43 |
+
"created data pipelines using Apache Airflow"
|
| 44 |
+
],
|
| 45 |
+
"problem_solving": [
|
| 46 |
+
"debugged production outage affecting 10K users",
|
| 47 |
+
"optimized algorithm complexity from O(n²) to O(n log n)",
|
| 48 |
+
"resolved memory leak in long-running service",
|
| 49 |
+
"diagnosed and fixed race condition in concurrent code",
|
| 50 |
+
"designed fallback strategy for external API failures",
|
| 51 |
+
"reduced P95 latency from 500ms to 100ms",
|
| 52 |
+
"automated manual deployment reducing errors by 80%",
|
| 53 |
+
"created monitoring dashboards to detect issues early",
|
| 54 |
+
"refactored legacy codebase improving maintainability",
|
| 55 |
+
"implemented retry logic with exponential backoff"
|
| 56 |
+
],
|
| 57 |
+
"leadership": [
|
| 58 |
+
"led a team of 5 engineers on product launch",
|
| 59 |
+
"managed sprint planning and backlog prioritization",
|
| 60 |
+
"conducted code reviews for junior developers",
|
| 61 |
+
"organized weekly tech talks for knowledge sharing",
|
| 62 |
+
"coordinated cross-team integration project",
|
| 63 |
+
"mentored 3 interns during summer program",
|
| 64 |
+
"drove architectural decisions for new microservice",
|
| 65 |
+
"led incident response during production outage",
|
| 66 |
+
"facilitated retrospectives improving team velocity",
|
| 67 |
+
"championed adoption of testing best practices"
|
| 68 |
+
],
|
| 69 |
+
"internship_experience": [
|
| 70 |
+
"6 months SWE intern at Google building recommendation systems",
|
| 71 |
+
"summer internship at Microsoft on Azure DevOps team",
|
| 72 |
+
"3 months ML intern at startup developing NLP models",
|
| 73 |
+
"backend engineering intern at Stripe working on payments",
|
| 74 |
+
"full-stack intern at Flipkart building seller dashboard",
|
| 75 |
+
"DevOps intern at Infosys implementing CI/CD",
|
| 76 |
+
"research intern at IIT Bombay on distributed systems",
|
| 77 |
+
"mobile development intern at Zomato for Android app",
|
| 78 |
+
"data engineering intern at Razorpay building pipelines",
|
| 79 |
+
"platform intern at Amazon working on internal tools"
|
| 80 |
+
]
|
| 81 |
+
},
|
| 82 |
+
"industry_benchmarks": {
|
| 83 |
+
"min_employability_score": 0.60,
|
| 84 |
+
"expected_cgpa": 7.5,
|
| 85 |
+
"expected_internship_months": 4,
|
| 86 |
+
"critical_skills": [
|
| 87 |
+
"python",
|
| 88 |
+
"sql",
|
| 89 |
+
"git",
|
| 90 |
+
"system_design"
|
| 91 |
+
],
|
| 92 |
+
"nice_to_have_skills": [
|
| 93 |
+
"kubernetes",
|
| 94 |
+
"aws",
|
| 95 |
+
"redis",
|
| 96 |
+
"graphql"
|
| 97 |
+
]
|
| 98 |
+
},
|
| 99 |
+
"skill_gaps_mapping": {
|
| 100 |
+
"cloud": {
|
| 101 |
+
"demand_score": 0.85,
|
| 102 |
+
"courses": [
|
| 103 |
+
"AWS Solutions Architect",
|
| 104 |
+
"GCP Professional",
|
| 105 |
+
"Azure Fundamentals"
|
| 106 |
+
],
|
| 107 |
+
"certifications": [
|
| 108 |
+
"AWS SAA",
|
| 109 |
+
"GCP ACE",
|
| 110 |
+
"Azure AZ-900"
|
| 111 |
+
]
|
| 112 |
+
},
|
| 113 |
+
"devops": {
|
| 114 |
+
"demand_score": 0.80,
|
| 115 |
+
"courses": [
|
| 116 |
+
"Docker Mastery",
|
| 117 |
+
"Kubernetes for Developers",
|
| 118 |
+
"CI/CD with GitHub Actions"
|
| 119 |
+
],
|
| 120 |
+
"certifications": [
|
| 121 |
+
"CKA",
|
| 122 |
+
"Docker DCA",
|
| 123 |
+
"Jenkins Certified"
|
| 124 |
+
]
|
| 125 |
+
},
|
| 126 |
+
"system_design": {
|
| 127 |
+
"demand_score": 0.75,
|
| 128 |
+
"courses": [
|
| 129 |
+
"Grokking System Design",
|
| 130 |
+
"Designing Data-Intensive Applications"
|
| 131 |
+
],
|
| 132 |
+
"certifications": []
|
| 133 |
+
},
|
| 134 |
+
"dsa": {
|
| 135 |
+
"demand_score": 0.70,
|
| 136 |
+
"courses": [
|
| 137 |
+
"LeetCode Patterns",
|
| 138 |
+
"AlgoExpert",
|
| 139 |
+
"NeetCode 150"
|
| 140 |
+
],
|
| 141 |
+
"certifications": []
|
| 142 |
+
}
|
| 143 |
+
},
|
| 144 |
+
"detection_keywords": [
|
| 145 |
+
"software",
|
| 146 |
+
"developer",
|
| 147 |
+
"engineer",
|
| 148 |
+
"backend",
|
| 149 |
+
"frontend",
|
| 150 |
+
"fullstack",
|
| 151 |
+
"web development",
|
| 152 |
+
"api",
|
| 153 |
+
"microservice",
|
| 154 |
+
"devops",
|
| 155 |
+
"sre",
|
| 156 |
+
"platform",
|
| 157 |
+
"coding",
|
| 158 |
+
"programming",
|
| 159 |
+
"python developer",
|
| 160 |
+
"java developer"
|
| 161 |
+
]
|
| 162 |
+
}
|
models/personality_responses.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Personality responses model"""
|
| 2 |
+
from typing import Dict, List
|
| 3 |
+
from dataclasses import dataclass
|
| 4 |
+
|
| 5 |
+
@dataclass
|
| 6 |
+
class PersonalityResponses:
|
| 7 |
+
student_id: str
|
| 8 |
+
responses: Dict[str, int] # p_q1: 1-5, p_q2: 1-5, etc.
|
| 9 |
+
|
| 10 |
+
def to_dict(self):
|
| 11 |
+
data = {'student_id': self.student_id}
|
| 12 |
+
data.update(self.responses)
|
| 13 |
+
return data
|
| 14 |
+
|
| 15 |
+
@staticmethod
|
| 16 |
+
def get_questions() -> List[Dict[str, str]]:
|
| 17 |
+
"""Return 20 curated personality questions mapped to Big Five traits"""
|
| 18 |
+
return [
|
| 19 |
+
# Openness (4 questions)
|
| 20 |
+
{"id": "p_q1", "text": "I enjoy exploring new ideas and concepts", "trait": "openness"},
|
| 21 |
+
{"id": "p_q2", "text": "I prefer routine over spontaneity", "trait": "openness_r"},
|
| 22 |
+
{"id": "p_q3", "text": "I am curious about many different things", "trait": "openness"},
|
| 23 |
+
{"id": "p_q4", "text": "I appreciate art and creative expression", "trait": "openness"},
|
| 24 |
+
|
| 25 |
+
# Conscientiousness (4 questions)
|
| 26 |
+
{"id": "p_q5", "text": "I am highly organized and plan ahead", "trait": "conscientiousness"},
|
| 27 |
+
{"id": "p_q6", "text": "I often procrastinate on tasks", "trait": "conscientiousness_r"},
|
| 28 |
+
{"id": "p_q7", "text": "I pay attention to details", "trait": "conscientiousness"},
|
| 29 |
+
{"id": "p_q8", "text": "I complete tasks on time", "trait": "conscientiousness"},
|
| 30 |
+
|
| 31 |
+
# Extraversion (4 questions)
|
| 32 |
+
{"id": "p_q9", "text": "I enjoy being the center of attention", "trait": "extraversion"},
|
| 33 |
+
{"id": "p_q10", "text": "I prefer working alone", "trait": "extraversion_r"},
|
| 34 |
+
{"id": "p_q11", "text": "I make friends easily", "trait": "extraversion"},
|
| 35 |
+
{"id": "p_q12", "text": "I am energized by social interactions", "trait": "extraversion"},
|
| 36 |
+
|
| 37 |
+
# Agreeableness (4 questions)
|
| 38 |
+
{"id": "p_q13", "text": "I am considerate of others' feelings", "trait": "agreeableness"},
|
| 39 |
+
{"id": "p_q14", "text": "I prefer competition over collaboration", "trait": "agreeableness_r"},
|
| 40 |
+
{"id": "p_q15", "text": "I trust people easily", "trait": "agreeableness"},
|
| 41 |
+
{"id": "p_q16", "text": "I help others when they need it", "trait": "agreeableness"},
|
| 42 |
+
|
| 43 |
+
# Emotional Stability (4 questions)
|
| 44 |
+
{"id": "p_q17", "text": "I handle stress well", "trait": "stability"},
|
| 45 |
+
{"id": "p_q18", "text": "I often feel anxious", "trait": "stability_r"},
|
| 46 |
+
{"id": "p_q19", "text": "I remain calm under pressure", "trait": "stability"},
|
| 47 |
+
{"id": "p_q20", "text": "I recover quickly from setbacks", "trait": "stability"},
|
| 48 |
+
]
|
models/student.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Student data model"""
|
| 2 |
+
from typing import Optional
|
| 3 |
+
from dataclasses import dataclass
|
| 4 |
+
|
| 5 |
+
@dataclass
|
| 6 |
+
class Student:
|
| 7 |
+
student_id: str
|
| 8 |
+
user_id: Optional[str]
|
| 9 |
+
cgpa: float
|
| 10 |
+
sgpa_sem1: Optional[float]
|
| 11 |
+
sgpa_sem2: Optional[float]
|
| 12 |
+
sgpa_sem3: Optional[float]
|
| 13 |
+
sgpa_sem4: Optional[float]
|
| 14 |
+
sgpa_sem5: Optional[float]
|
| 15 |
+
sgpa_sem6: Optional[float]
|
| 16 |
+
sgpa_sem7: Optional[float]
|
| 17 |
+
sgpa_sem8: Optional[float]
|
| 18 |
+
tenth_pct: Optional[float]
|
| 19 |
+
twelfth_pct: Optional[float]
|
| 20 |
+
extracurricular_text: Optional[str] = None
|
| 21 |
+
certifications_text: Optional[str] = None
|
| 22 |
+
internship_text: Optional[str] = None
|
| 23 |
+
|
| 24 |
+
def to_dict(self):
|
| 25 |
+
return {
|
| 26 |
+
'student_id': self.student_id,
|
| 27 |
+
'user_id': self.user_id,
|
| 28 |
+
'cgpa': self.cgpa,
|
| 29 |
+
'sgpa_sem1': self.sgpa_sem1,
|
| 30 |
+
'sgpa_sem2': self.sgpa_sem2,
|
| 31 |
+
'sgpa_sem3': self.sgpa_sem3,
|
| 32 |
+
'sgpa_sem4': self.sgpa_sem4,
|
| 33 |
+
'sgpa_sem5': self.sgpa_sem5,
|
| 34 |
+
'sgpa_sem6': self.sgpa_sem6,
|
| 35 |
+
'sgpa_sem7': self.sgpa_sem7,
|
| 36 |
+
'sgpa_sem8': self.sgpa_sem8,
|
| 37 |
+
'tenth_pct': self.tenth_pct,
|
| 38 |
+
'twelfth_pct': self.twelfth_pct,
|
| 39 |
+
'extracurricular_text': self.extracurricular_text,
|
| 40 |
+
'certifications_text': self.certifications_text,
|
| 41 |
+
'internship_text': self.internship_text
|
| 42 |
+
}
|
models/text_responses.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Text responses model"""
|
| 2 |
+
from typing import List, Dict
|
| 3 |
+
from dataclasses import dataclass
|
| 4 |
+
|
| 5 |
+
@dataclass
|
| 6 |
+
class TextResponses:
|
| 7 |
+
student_id: str
|
| 8 |
+
text_q1: str # Strengths
|
| 9 |
+
text_q2: str # Career interests
|
| 10 |
+
text_q3: str # Extracurriculars + leadership
|
| 11 |
+
|
| 12 |
+
def to_dict(self):
|
| 13 |
+
return {
|
| 14 |
+
'student_id': self.student_id,
|
| 15 |
+
'text_q1': self.text_q1,
|
| 16 |
+
'text_q2': self.text_q2,
|
| 17 |
+
'text_q3': self.text_q3
|
| 18 |
+
}
|
| 19 |
+
|
| 20 |
+
@staticmethod
|
| 21 |
+
def get_questions() -> List[Dict[str, str]]:
|
| 22 |
+
"""Return the 3 textual questions"""
|
| 23 |
+
return [
|
| 24 |
+
{
|
| 25 |
+
"id": "text_q1",
|
| 26 |
+
"text": "What are your key strengths and technical skills? (150-300 words)",
|
| 27 |
+
"placeholder": "Describe your technical skills, soft skills, and what makes you stand out..."
|
| 28 |
+
},
|
| 29 |
+
{
|
| 30 |
+
"id": "text_q2",
|
| 31 |
+
"text": "What are your career interests and goals? (150-300 words)",
|
| 32 |
+
"placeholder": "Describe your ideal career path, industries of interest, and long-term goals..."
|
| 33 |
+
},
|
| 34 |
+
{
|
| 35 |
+
"id": "text_q3",
|
| 36 |
+
"text": "Describe your extracurricular activities and leadership experiences. (150-300 words)",
|
| 37 |
+
"placeholder": "Share your involvement in clubs, projects, leadership roles, and impact..."
|
| 38 |
+
}
|
| 39 |
+
]
|
requirements.txt
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Flask==3.0.0
|
| 2 |
+
Flask-CORS==4.0.0
|
| 3 |
+
supabase==2.9.0
|
| 4 |
+
websockets>=15.0.1
|
| 5 |
+
sentence-transformers>=2.2.0
|
| 6 |
+
numpy>=1.24.0
|
| 7 |
+
pandas>=2.0.0
|
| 8 |
+
scikit-learn>=1.3.0
|
| 9 |
+
python-dotenv==1.0.0
|
| 10 |
+
gunicorn==21.2.0
|
| 11 |
+
torch>=2.0.0
|
| 12 |
+
transformers>=4.30.0
|
routes/domain.py
ADDED
|
@@ -0,0 +1,202 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Domain Evidence Routes
|
| 2 |
+
|
| 3 |
+
API endpoints for submitting and managing domain-specific evidence
|
| 4 |
+
"""
|
| 5 |
+
from flask import Blueprint, request, jsonify
|
| 6 |
+
import logging
|
| 7 |
+
from database.db import get_db
|
| 8 |
+
from services.domain_plugins import DomainPluginFactory
|
| 9 |
+
from services.domain_plugins.tech_plugin import TechPlugin
|
| 10 |
+
from services.domain_plugins.business_plugin import BusinessPlugin
|
| 11 |
+
from services.domain_plugins.creative_plugin import CreativePlugin
|
| 12 |
+
from services.domain_plugins.research_plugin import ResearchPlugin
|
| 13 |
+
|
| 14 |
+
domain_bp = Blueprint('domain', __name__)
|
| 15 |
+
db = get_db()
|
| 16 |
+
logger = logging.getLogger(__name__)
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
@domain_bp.route('/domain/available', methods=['GET'])
|
| 20 |
+
def list_available_domains():
|
| 21 |
+
"""List all available domain plugins"""
|
| 22 |
+
try:
|
| 23 |
+
domains = DomainPluginFactory.list_available_domains()
|
| 24 |
+
|
| 25 |
+
# Get detailed info for each domain
|
| 26 |
+
domain_info = []
|
| 27 |
+
for domain_type in domains:
|
| 28 |
+
info = DomainPluginFactory.get_domain_info(domain_type)
|
| 29 |
+
if info:
|
| 30 |
+
domain_info.append(info)
|
| 31 |
+
|
| 32 |
+
return jsonify({
|
| 33 |
+
'success': True,
|
| 34 |
+
'domains': domain_info
|
| 35 |
+
}), 200
|
| 36 |
+
|
| 37 |
+
except Exception as e:
|
| 38 |
+
logger.error(f"Error listing domains: {e}")
|
| 39 |
+
return jsonify({'error': str(e)}), 500
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
@domain_bp.route('/students/<student_id>/domain-evidence', methods=['POST'])
|
| 43 |
+
def submit_domain_evidence(student_id):
|
| 44 |
+
"""Submit domain-specific evidence for scoring"""
|
| 45 |
+
try:
|
| 46 |
+
data = request.json
|
| 47 |
+
domain_type = data.get('domain_type')
|
| 48 |
+
evidence_data = data.get('evidence_data', {})
|
| 49 |
+
|
| 50 |
+
# Validate domain type
|
| 51 |
+
if not DomainPluginFactory.is_domain_available(domain_type):
|
| 52 |
+
return jsonify({
|
| 53 |
+
'error': f'Invalid domain type: {domain_type}',
|
| 54 |
+
'available_domains': DomainPluginFactory.list_available_domains()
|
| 55 |
+
}), 400
|
| 56 |
+
|
| 57 |
+
# Get plugin
|
| 58 |
+
plugin = DomainPluginFactory.get_plugin(domain_type)
|
| 59 |
+
|
| 60 |
+
# Validate inputs
|
| 61 |
+
is_valid, error_msg = plugin.validate_inputs(evidence_data)
|
| 62 |
+
if not is_valid:
|
| 63 |
+
return jsonify({'error': error_msg}), 400
|
| 64 |
+
|
| 65 |
+
# Check if student exists
|
| 66 |
+
student_check = db.table('analytics_students').select('student_id').eq('student_id', student_id).execute()
|
| 67 |
+
if not student_check.data:
|
| 68 |
+
return jsonify({'error': 'Student not found'}), 404
|
| 69 |
+
|
| 70 |
+
# Score the evidence
|
| 71 |
+
logger.info(f"Scoring {domain_type} evidence for student {student_id}")
|
| 72 |
+
domain_score = plugin.score(evidence_data)
|
| 73 |
+
|
| 74 |
+
# Store evidence and score
|
| 75 |
+
evidence_record = {
|
| 76 |
+
'student_id': student_id,
|
| 77 |
+
'domain_type': domain_type,
|
| 78 |
+
'evidence_data': evidence_data,
|
| 79 |
+
'domain_score': domain_score.score,
|
| 80 |
+
'domain_confidence': domain_score.confidence,
|
| 81 |
+
'raw_features': domain_score.raw_features,
|
| 82 |
+
'processing_status': 'completed'
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
result = db.table('analytics_domain_evidence').upsert(evidence_record).execute()
|
| 86 |
+
|
| 87 |
+
# Update student's active domain and cached scores
|
| 88 |
+
student_update = {
|
| 89 |
+
'student_id': student_id,
|
| 90 |
+
'active_domain': domain_type,
|
| 91 |
+
'domain_score': domain_score.score,
|
| 92 |
+
'domain_confidence': domain_score.confidence
|
| 93 |
+
}
|
| 94 |
+
|
| 95 |
+
db.table('analytics_students').upsert(student_update).execute()
|
| 96 |
+
|
| 97 |
+
logger.info(f"Domain evidence submitted successfully: {domain_type} score = {domain_score.score:.3f}")
|
| 98 |
+
|
| 99 |
+
return jsonify({
|
| 100 |
+
'success': True,
|
| 101 |
+
'domain_score': domain_score.to_dict(),
|
| 102 |
+
'message': f'{domain_type.capitalize()} domain evidence processed successfully'
|
| 103 |
+
}), 201
|
| 104 |
+
|
| 105 |
+
except Exception as e:
|
| 106 |
+
logger.error(f"Error submitting domain evidence: {e}")
|
| 107 |
+
import traceback
|
| 108 |
+
traceback.print_exc()
|
| 109 |
+
|
| 110 |
+
# Store error in database
|
| 111 |
+
try:
|
| 112 |
+
error_record = {
|
| 113 |
+
'student_id': student_id,
|
| 114 |
+
'domain_type': domain_type,
|
| 115 |
+
'evidence_data': evidence_data,
|
| 116 |
+
'processing_status': 'failed',
|
| 117 |
+
'error_message': str(e)
|
| 118 |
+
}
|
| 119 |
+
db.table('analytics_domain_evidence').upsert(error_record).execute()
|
| 120 |
+
except:
|
| 121 |
+
pass
|
| 122 |
+
|
| 123 |
+
return jsonify({'error': str(e)}), 500
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
@domain_bp.route('/students/<student_id>/domain-evidence', methods=['GET'])
|
| 127 |
+
def get_domain_evidence(student_id):
|
| 128 |
+
"""Get domain evidence for a student"""
|
| 129 |
+
try:
|
| 130 |
+
result = db.table('analytics_domain_evidence').select('*').eq('student_id', student_id).execute()
|
| 131 |
+
|
| 132 |
+
if not result.data:
|
| 133 |
+
return jsonify({
|
| 134 |
+
'success': True,
|
| 135 |
+
'evidence': [],
|
| 136 |
+
'message': 'No domain evidence found'
|
| 137 |
+
}), 200
|
| 138 |
+
|
| 139 |
+
return jsonify({
|
| 140 |
+
'success': True,
|
| 141 |
+
'evidence': result.data
|
| 142 |
+
}), 200
|
| 143 |
+
|
| 144 |
+
except Exception as e:
|
| 145 |
+
logger.error(f"Error retrieving domain evidence: {e}")
|
| 146 |
+
return jsonify({'error': str(e)}), 500
|
| 147 |
+
|
| 148 |
+
|
| 149 |
+
@domain_bp.route('/students/<student_id>/domain-evidence/<domain_type>', methods=['GET'])
|
| 150 |
+
def get_specific_domain_evidence(student_id, domain_type):
|
| 151 |
+
"""Get specific domain evidence for a student"""
|
| 152 |
+
try:
|
| 153 |
+
result = db.table('analytics_domain_evidence')\
|
| 154 |
+
.select('*')\
|
| 155 |
+
.eq('student_id', student_id)\
|
| 156 |
+
.eq('domain_type', domain_type)\
|
| 157 |
+
.execute()
|
| 158 |
+
|
| 159 |
+
if not result.data:
|
| 160 |
+
return jsonify({
|
| 161 |
+
'error': f'No {domain_type} evidence found for student {student_id}'
|
| 162 |
+
}), 404
|
| 163 |
+
|
| 164 |
+
return jsonify({
|
| 165 |
+
'success': True,
|
| 166 |
+
'evidence': result.data[0]
|
| 167 |
+
}), 200
|
| 168 |
+
|
| 169 |
+
except Exception as e:
|
| 170 |
+
logger.error(f"Error retrieving domain evidence: {e}")
|
| 171 |
+
return jsonify({'error': str(e)}), 500
|
| 172 |
+
|
| 173 |
+
|
| 174 |
+
@domain_bp.route('/students/<student_id>/domain-evidence/<domain_type>', methods=['DELETE'])
|
| 175 |
+
def delete_domain_evidence(student_id, domain_type):
|
| 176 |
+
"""Delete domain evidence for a student"""
|
| 177 |
+
try:
|
| 178 |
+
# Delete evidence record
|
| 179 |
+
db.table('analytics_domain_evidence')\
|
| 180 |
+
.delete()\
|
| 181 |
+
.eq('student_id', student_id)\
|
| 182 |
+
.eq('domain_type', domain_type)\
|
| 183 |
+
.execute()
|
| 184 |
+
|
| 185 |
+
# Update student to clear active domain if it matches
|
| 186 |
+
student = db.table('analytics_students').select('active_domain').eq('student_id', student_id).execute()
|
| 187 |
+
|
| 188 |
+
if student.data and student.data[0].get('active_domain') == domain_type:
|
| 189 |
+
db.table('analytics_students').update({
|
| 190 |
+
'active_domain': None,
|
| 191 |
+
'domain_score': None,
|
| 192 |
+
'domain_confidence': None
|
| 193 |
+
}).eq('student_id', student_id).execute()
|
| 194 |
+
|
| 195 |
+
return jsonify({
|
| 196 |
+
'success': True,
|
| 197 |
+
'message': f'{domain_type.capitalize()} evidence deleted'
|
| 198 |
+
}), 200
|
| 199 |
+
|
| 200 |
+
except Exception as e:
|
| 201 |
+
logger.error(f"Error deleting domain evidence: {e}")
|
| 202 |
+
return jsonify({'error': str(e)}), 500
|
routes/scoring.py
ADDED
|
@@ -0,0 +1,204 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Scoring routes"""
|
| 2 |
+
from flask import Blueprint, jsonify
|
| 3 |
+
from database.db import get_db
|
| 4 |
+
from services.universal_module import UniversalModule
|
| 5 |
+
from services.personality_module import PersonalityModule
|
| 6 |
+
from services.text_module import TextModule
|
| 7 |
+
from services.fusion import FusionEngine
|
| 8 |
+
|
| 9 |
+
scoring_bp = Blueprint('scoring', __name__)
|
| 10 |
+
db = get_db()
|
| 11 |
+
|
| 12 |
+
# Initialize modules
|
| 13 |
+
universal_module = UniversalModule()
|
| 14 |
+
personality_module = PersonalityModule()
|
| 15 |
+
text_module = TextModule()
|
| 16 |
+
fusion_engine = FusionEngine()
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
@scoring_bp.route('/score/<student_id>', methods=['GET'])
|
| 20 |
+
def get_student_score(student_id):
|
| 21 |
+
"""
|
| 22 |
+
Compute and return full scoring packet for a student
|
| 23 |
+
"""
|
| 24 |
+
try:
|
| 25 |
+
# 1. Fetch student data
|
| 26 |
+
student_result = db.table('analytics_students').select('*').eq('student_id', student_id).single().execute()
|
| 27 |
+
if not student_result.data:
|
| 28 |
+
return jsonify({'error': 'Student not found'}), 404
|
| 29 |
+
|
| 30 |
+
student_data = student_result.data
|
| 31 |
+
|
| 32 |
+
# 2. Fetch personality responses
|
| 33 |
+
personality_result = db.table('analytics_personality_responses').select('*').eq('student_id', student_id).maybe_single().execute()
|
| 34 |
+
personality_responses = {}
|
| 35 |
+
if personality_result.data:
|
| 36 |
+
personality_responses = {k: v for k, v in personality_result.data.items() if k.startswith('p_q')}
|
| 37 |
+
|
| 38 |
+
# 3. Fetch text responses
|
| 39 |
+
text_result = db.table('analytics_text_responses').select('*').eq('student_id', student_id).maybe_single().execute()
|
| 40 |
+
text_responses = {}
|
| 41 |
+
if text_result.data:
|
| 42 |
+
text_responses = {
|
| 43 |
+
'text_q1': text_result.data.get('text_q1', ''),
|
| 44 |
+
'text_q2': text_result.data.get('text_q2', ''),
|
| 45 |
+
'text_q3': text_result.data.get('text_q3', '')
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
# 3.5. Fetch domain evidence (if exists)
|
| 49 |
+
domain_score = None
|
| 50 |
+
domain_confidence = None
|
| 51 |
+
domain_type = None
|
| 52 |
+
domain_features = {}
|
| 53 |
+
|
| 54 |
+
if student_data.get('active_domain'):
|
| 55 |
+
domain_type = student_data.get('active_domain')
|
| 56 |
+
domain_score = student_data.get('domain_score')
|
| 57 |
+
domain_confidence = student_data.get('domain_confidence')
|
| 58 |
+
|
| 59 |
+
# Fetch detailed domain evidence
|
| 60 |
+
domain_result = db.table('analytics_domain_evidence')\
|
| 61 |
+
.select('*')\
|
| 62 |
+
.eq('student_id', student_id)\
|
| 63 |
+
.eq('domain_type', domain_type)\
|
| 64 |
+
.maybe_single()\
|
| 65 |
+
.execute()
|
| 66 |
+
|
| 67 |
+
if domain_result.data:
|
| 68 |
+
domain_features = domain_result.data.get('raw_features', {})
|
| 69 |
+
|
| 70 |
+
# 4. Calculate universal score
|
| 71 |
+
universal_score, universal_confidence, universal_features = universal_module.score(student_data)
|
| 72 |
+
universal_explanations = universal_module.explain(universal_features)
|
| 73 |
+
|
| 74 |
+
# 5. Calculate personality score
|
| 75 |
+
personality_score, personality_confidence, personality_traits = personality_module.score(personality_responses)
|
| 76 |
+
personality_explanations = personality_module.explain(personality_traits)
|
| 77 |
+
|
| 78 |
+
# 6. Calculate text score
|
| 79 |
+
text_score, text_confidence, text_features = text_module.score(text_responses)
|
| 80 |
+
text_explanations = text_module.explain(text_features)
|
| 81 |
+
|
| 82 |
+
# 7. Fuse scores (with optional domain score)
|
| 83 |
+
final_score, breakdown = fusion_engine.fuse_scores(
|
| 84 |
+
universal_score, universal_confidence,
|
| 85 |
+
personality_score, personality_confidence,
|
| 86 |
+
text_score, text_confidence,
|
| 87 |
+
domain_score, domain_confidence
|
| 88 |
+
)
|
| 89 |
+
|
| 90 |
+
# 8. Get grade and percentile
|
| 91 |
+
grade = fusion_engine.get_grade(final_score)
|
| 92 |
+
percentile = fusion_engine.get_percentile(final_score)
|
| 93 |
+
|
| 94 |
+
# 9. Prepare response
|
| 95 |
+
response = {
|
| 96 |
+
'student_id': student_id,
|
| 97 |
+
'final_score': round(final_score, 4),
|
| 98 |
+
'grade': grade,
|
| 99 |
+
'percentile': percentile,
|
| 100 |
+
'scores': breakdown,
|
| 101 |
+
'explanations': {
|
| 102 |
+
'universal': universal_explanations,
|
| 103 |
+
'personality': personality_explanations,
|
| 104 |
+
'text': text_explanations
|
| 105 |
+
},
|
| 106 |
+
'detailed_features': {
|
| 107 |
+
'universal': {k: round(v, 3) for k, v in universal_features.items()},
|
| 108 |
+
'personality': {k: round(v, 3) for k, v in personality_traits.items()},
|
| 109 |
+
'text': {k: round(v, 3) for k, v in text_features.items()}
|
| 110 |
+
},
|
| 111 |
+
'data_completeness': {
|
| 112 |
+
'universal': universal_confidence,
|
| 113 |
+
'personality': personality_confidence,
|
| 114 |
+
'text': text_confidence
|
| 115 |
+
}
|
| 116 |
+
}
|
| 117 |
+
|
| 118 |
+
# Add domain information if present
|
| 119 |
+
if domain_type:
|
| 120 |
+
response['domain_type'] = domain_type
|
| 121 |
+
response['detailed_features']['domain'] = {k: round(v, 3) for k, v in domain_features.items()}
|
| 122 |
+
response['data_completeness']['domain'] = domain_confidence
|
| 123 |
+
response['explanations']['domain'] = {
|
| 124 |
+
'message': f'{domain_type.capitalize()} domain evidence provided',
|
| 125 |
+
'features': domain_features
|
| 126 |
+
}
|
| 127 |
+
else:
|
| 128 |
+
response['domain_type'] = None
|
| 129 |
+
response['explanations']['domain'] = {
|
| 130 |
+
'message': 'No domain-specific evidence submitted. Submit GitHub/portfolio/resume for enhanced scoring.'
|
| 131 |
+
}
|
| 132 |
+
|
| 133 |
+
return jsonify({
|
| 134 |
+
'success': True,
|
| 135 |
+
'data': response
|
| 136 |
+
}), 200
|
| 137 |
+
|
| 138 |
+
except Exception as e:
|
| 139 |
+
import traceback
|
| 140 |
+
traceback.print_exc()
|
| 141 |
+
return jsonify({'error': str(e)}), 500
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
@scoring_bp.route('/leaderboard', methods=['GET'])
|
| 145 |
+
def get_leaderboard():
|
| 146 |
+
"""
|
| 147 |
+
Get top students by score (mock for MVP)
|
| 148 |
+
In production, this would compute and cache scores
|
| 149 |
+
"""
|
| 150 |
+
try:
|
| 151 |
+
# Fetch all students
|
| 152 |
+
students = db.table('analytics_students').select('*').execute()
|
| 153 |
+
|
| 154 |
+
leaderboard = []
|
| 155 |
+
for student in students.data[:10]: # Limit to 10 for MVP
|
| 156 |
+
try:
|
| 157 |
+
# Quick score calculation
|
| 158 |
+
student_id = student['student_id']
|
| 159 |
+
|
| 160 |
+
# Get personality
|
| 161 |
+
personality_result = db.table('analytics_personality_responses').select('*').eq('student_id', student_id).maybe_single().execute()
|
| 162 |
+
personality_responses = {}
|
| 163 |
+
if personality_result.data:
|
| 164 |
+
personality_responses = {k: v for k, v in personality_result.data.items() if k.startswith('p_q')}
|
| 165 |
+
|
| 166 |
+
# Get text
|
| 167 |
+
text_result = db.table('analytics_text_responses').select('*').eq('student_id', student_id).maybe_single().execute()
|
| 168 |
+
text_responses = {}
|
| 169 |
+
if text_result.data:
|
| 170 |
+
text_responses = {
|
| 171 |
+
'text_q1': text_result.data.get('text_q1', ''),
|
| 172 |
+
'text_q2': text_result.data.get('text_q2', ''),
|
| 173 |
+
'text_q3': text_result.data.get('text_q3', '')
|
| 174 |
+
}
|
| 175 |
+
|
| 176 |
+
# Calculate scores
|
| 177 |
+
universal_score, universal_conf, _ = universal_module.score(student)
|
| 178 |
+
personality_score, personality_conf, _ = personality_module.score(personality_responses)
|
| 179 |
+
text_score, text_conf, _ = text_module.score(text_responses)
|
| 180 |
+
|
| 181 |
+
final_score, _ = fusion_engine.fuse_scores(
|
| 182 |
+
universal_score, universal_conf,
|
| 183 |
+
personality_score, personality_conf,
|
| 184 |
+
text_score, text_conf
|
| 185 |
+
)
|
| 186 |
+
|
| 187 |
+
leaderboard.append({
|
| 188 |
+
'student_id': student_id,
|
| 189 |
+
'final_score': round(final_score, 3),
|
| 190 |
+
'grade': fusion_engine.get_grade(final_score)
|
| 191 |
+
})
|
| 192 |
+
except:
|
| 193 |
+
continue
|
| 194 |
+
|
| 195 |
+
# Sort by score
|
| 196 |
+
leaderboard.sort(key=lambda x: x['final_score'], reverse=True)
|
| 197 |
+
|
| 198 |
+
return jsonify({
|
| 199 |
+
'success': True,
|
| 200 |
+
'data': leaderboard
|
| 201 |
+
}), 200
|
| 202 |
+
|
| 203 |
+
except Exception as e:
|
| 204 |
+
return jsonify({'error': str(e)}), 500
|
routes/students.py
ADDED
|
@@ -0,0 +1,173 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Student management routes"""
|
| 2 |
+
from flask import Blueprint, request, jsonify
|
| 3 |
+
from database.db import get_db
|
| 4 |
+
from models.student import Student
|
| 5 |
+
|
| 6 |
+
students_bp = Blueprint('students', __name__)
|
| 7 |
+
db = get_db()
|
| 8 |
+
|
| 9 |
+
@students_bp.route('/students', methods=['POST'])
|
| 10 |
+
def create_student():
|
| 11 |
+
"""Create a new student profile"""
|
| 12 |
+
try:
|
| 13 |
+
data = request.json
|
| 14 |
+
print(f"Received data: {data}") # Debug log
|
| 15 |
+
|
| 16 |
+
# Validate required fields
|
| 17 |
+
required = ['student_id', 'cgpa']
|
| 18 |
+
if not all(k in data for k in required):
|
| 19 |
+
return jsonify({'error': 'Missing required fields'}), 400
|
| 20 |
+
|
| 21 |
+
# Create student record
|
| 22 |
+
student = Student(
|
| 23 |
+
student_id=data['student_id'],
|
| 24 |
+
user_id=data.get('user_id'),
|
| 25 |
+
cgpa=data['cgpa'],
|
| 26 |
+
sgpa_sem1=data.get('sgpa_sem1'),
|
| 27 |
+
sgpa_sem2=data.get('sgpa_sem2'),
|
| 28 |
+
sgpa_sem3=data.get('sgpa_sem3'),
|
| 29 |
+
sgpa_sem4=data.get('sgpa_sem4'),
|
| 30 |
+
sgpa_sem5=data.get('sgpa_sem5'),
|
| 31 |
+
sgpa_sem6=data.get('sgpa_sem6'),
|
| 32 |
+
sgpa_sem7=data.get('sgpa_sem7'),
|
| 33 |
+
sgpa_sem8=data.get('sgpa_sem8'),
|
| 34 |
+
tenth_pct=data.get('tenth_pct'),
|
| 35 |
+
twelfth_pct=data.get('twelfth_pct'),
|
| 36 |
+
extracurricular_text=data.get('extracurricular_text'),
|
| 37 |
+
certifications_text=data.get('certifications_text'),
|
| 38 |
+
internship_text=data.get('internship_text')
|
| 39 |
+
)
|
| 40 |
+
|
| 41 |
+
print(f"Student object created: {student.to_dict()}") # Debug log
|
| 42 |
+
|
| 43 |
+
# Insert or update into database (upsert)
|
| 44 |
+
result = db.table('analytics_students').upsert(student.to_dict()).execute()
|
| 45 |
+
|
| 46 |
+
print(f"Database result: {result}") # Debug log
|
| 47 |
+
|
| 48 |
+
return jsonify({
|
| 49 |
+
'success': True,
|
| 50 |
+
'data': result.data[0] if result.data else None
|
| 51 |
+
}), 201
|
| 52 |
+
|
| 53 |
+
except Exception as e:
|
| 54 |
+
print(f"Error in create_student: {str(e)}") # Debug log
|
| 55 |
+
import traceback
|
| 56 |
+
traceback.print_exc()
|
| 57 |
+
return jsonify({'error': str(e)}), 500
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
@students_bp.route('/students', methods=['GET'])
|
| 61 |
+
def list_students():
|
| 62 |
+
"""List all students"""
|
| 63 |
+
try:
|
| 64 |
+
result = db.table('analytics_students').select('*').execute()
|
| 65 |
+
return jsonify({
|
| 66 |
+
'success': True,
|
| 67 |
+
'data': result.data
|
| 68 |
+
}), 200
|
| 69 |
+
except Exception as e:
|
| 70 |
+
return jsonify({'error': str(e)}), 500
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
@students_bp.route('/students/<student_id>', methods=['GET'])
|
| 74 |
+
def get_student(student_id):
|
| 75 |
+
"""Get a specific student"""
|
| 76 |
+
try:
|
| 77 |
+
result = db.table('analytics_students').select('*').eq('student_id', student_id).single().execute()
|
| 78 |
+
|
| 79 |
+
if not result.data:
|
| 80 |
+
return jsonify({'error': 'Student not found'}), 404
|
| 81 |
+
|
| 82 |
+
return jsonify({
|
| 83 |
+
'success': True,
|
| 84 |
+
'data': result.data
|
| 85 |
+
}), 200
|
| 86 |
+
except Exception as e:
|
| 87 |
+
return jsonify({'error': str(e)}), 500
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
@students_bp.route('/personality/<student_id>', methods=['POST'])
|
| 91 |
+
def submit_personality(student_id):
|
| 92 |
+
"""Submit personality responses for a student"""
|
| 93 |
+
try:
|
| 94 |
+
data = request.json
|
| 95 |
+
|
| 96 |
+
# Validate student exists
|
| 97 |
+
student = db.table('analytics_students').select('student_id').eq('student_id', student_id).single().execute()
|
| 98 |
+
if not student.data:
|
| 99 |
+
return jsonify({'error': 'Student not found'}), 404
|
| 100 |
+
|
| 101 |
+
# Prepare personality data
|
| 102 |
+
personality_data = {'student_id': student_id}
|
| 103 |
+
for i in range(1, 21):
|
| 104 |
+
key = f'p_q{i}'
|
| 105 |
+
if key in data:
|
| 106 |
+
personality_data[key] = data[key]
|
| 107 |
+
|
| 108 |
+
# Insert or update
|
| 109 |
+
result = db.table('analytics_personality_responses').upsert(personality_data).execute()
|
| 110 |
+
|
| 111 |
+
return jsonify({
|
| 112 |
+
'success': True,
|
| 113 |
+
'data': result.data[0] if result.data else None
|
| 114 |
+
}), 201
|
| 115 |
+
|
| 116 |
+
except Exception as e:
|
| 117 |
+
return jsonify({'error': str(e)}), 500
|
| 118 |
+
|
| 119 |
+
|
| 120 |
+
@students_bp.route('/text/<student_id>', methods=['POST'])
|
| 121 |
+
def submit_text(student_id):
|
| 122 |
+
"""Submit text responses for a student"""
|
| 123 |
+
try:
|
| 124 |
+
data = request.json
|
| 125 |
+
|
| 126 |
+
# Validate student exists
|
| 127 |
+
student = db.table('analytics_students').select('student_id').eq('student_id', student_id).single().execute()
|
| 128 |
+
if not student.data:
|
| 129 |
+
return jsonify({'error': 'Student not found'}), 404
|
| 130 |
+
|
| 131 |
+
# Validate required text fields
|
| 132 |
+
required = ['text_q1', 'text_q2', 'text_q3']
|
| 133 |
+
if not all(k in data for k in required):
|
| 134 |
+
return jsonify({'error': 'Missing required text fields'}), 400
|
| 135 |
+
|
| 136 |
+
# Prepare text data
|
| 137 |
+
text_data = {
|
| 138 |
+
'student_id': student_id,
|
| 139 |
+
'text_q1': data['text_q1'],
|
| 140 |
+
'text_q2': data['text_q2'],
|
| 141 |
+
'text_q3': data['text_q3']
|
| 142 |
+
}
|
| 143 |
+
|
| 144 |
+
# Insert or update
|
| 145 |
+
result = db.table('analytics_text_responses').upsert(text_data).execute()
|
| 146 |
+
|
| 147 |
+
return jsonify({
|
| 148 |
+
'success': True,
|
| 149 |
+
'data': result.data[0] if result.data else None
|
| 150 |
+
}), 201
|
| 151 |
+
|
| 152 |
+
except Exception as e:
|
| 153 |
+
return jsonify({'error': str(e)}), 500
|
| 154 |
+
|
| 155 |
+
|
| 156 |
+
@students_bp.route('/questions/personality', methods=['GET'])
|
| 157 |
+
def get_personality_questions():
|
| 158 |
+
"""Get the 20 personality questions"""
|
| 159 |
+
from models.personality_responses import PersonalityResponses
|
| 160 |
+
return jsonify({
|
| 161 |
+
'success': True,
|
| 162 |
+
'data': PersonalityResponses.get_questions()
|
| 163 |
+
}), 200
|
| 164 |
+
|
| 165 |
+
|
| 166 |
+
@students_bp.route('/questions/text', methods=['GET'])
|
| 167 |
+
def get_text_questions():
|
| 168 |
+
"""Get the 3 text questions"""
|
| 169 |
+
from models.text_responses import TextResponses
|
| 170 |
+
return jsonify({
|
| 171 |
+
'success': True,
|
| 172 |
+
'data': TextResponses.get_questions()
|
| 173 |
+
}), 200
|
services/README_text_v2.md
ADDED
|
@@ -0,0 +1,210 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Text Module V2 - Aspect-Based Scoring
|
| 2 |
+
|
| 3 |
+
## Overview
|
| 4 |
+
Enhanced text analysis using prototype-based aspect extraction with `all-mpnet-base-v2` embeddings.
|
| 5 |
+
|
| 6 |
+
## Changes from V1
|
| 7 |
+
- **Model**: Upgraded from `all-MiniLM-L6-v2` (384d) to `all-mpnet-base-v2` (768d)
|
| 8 |
+
- **Approach**: Moved from simple reference embeddings to aspect-based prototype scoring
|
| 9 |
+
- **Aspects**: 10 employability aspects (leadership, technical_skills, problem_solving, etc.)
|
| 10 |
+
- **Admin**: Runtime seed updates via REST API
|
| 11 |
+
|
| 12 |
+
## Configuration
|
| 13 |
+
|
| 14 |
+
### Model Selection
|
| 15 |
+
Set via environment variable or constructor:
|
| 16 |
+
```bash
|
| 17 |
+
export ASPECT_MODEL_NAME=all-mpnet-base-v2 # default
|
| 18 |
+
# or
|
| 19 |
+
export ASPECT_MODEL_NAME=all-MiniLM-L6-v2 # fallback
|
| 20 |
+
```
|
| 21 |
+
|
| 22 |
+
```python
|
| 23 |
+
from services.text_module_v2 import TextModuleV2
|
| 24 |
+
|
| 25 |
+
# Default (all-mpnet-base-v2)
|
| 26 |
+
text_module = TextModuleV2()
|
| 27 |
+
|
| 28 |
+
# Override model
|
| 29 |
+
text_module = TextModuleV2(model_name='all-MiniLM-L6-v2')
|
| 30 |
+
```
|
| 31 |
+
|
| 32 |
+
### Aspect Seeds
|
| 33 |
+
Seeds loaded from `./aspect_seeds.json` (created by default). Edit this file to customize aspect definitions.
|
| 34 |
+
|
| 35 |
+
**Location**: `analytics/backend/aspect_seeds.json`
|
| 36 |
+
|
| 37 |
+
### Centroids Cache
|
| 38 |
+
Pre-computed centroids saved to `./aspect_centroids.npz` for fast cold starts.
|
| 39 |
+
|
| 40 |
+
## Usage
|
| 41 |
+
|
| 42 |
+
### Basic Scoring
|
| 43 |
+
```python
|
| 44 |
+
text_module = TextModuleV2()
|
| 45 |
+
|
| 46 |
+
text_responses = {
|
| 47 |
+
'text_q1': "I developed ML pipelines using Python and scikit-learn...",
|
| 48 |
+
'text_q2': "My career goal is to become a data scientist...",
|
| 49 |
+
'text_q3': "I led a team of 5 students in a hackathon project..."
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
score, confidence, features = text_module.score(text_responses)
|
| 53 |
+
|
| 54 |
+
print(f"Score: {score:.2f}, Confidence: {confidence:.2f}")
|
| 55 |
+
print(f"Features: {features}")
|
| 56 |
+
```
|
| 57 |
+
|
| 58 |
+
### Get Current Seeds
|
| 59 |
+
```python
|
| 60 |
+
seeds = text_module.get_aspect_seeds()
|
| 61 |
+
print(f"Loaded {len(seeds)} aspects")
|
| 62 |
+
```
|
| 63 |
+
|
| 64 |
+
## Admin API
|
| 65 |
+
|
| 66 |
+
### Setup
|
| 67 |
+
```python
|
| 68 |
+
from flask import Flask
|
| 69 |
+
from services.text_module_v2 import TextModuleV2, register_admin_seed_endpoint
|
| 70 |
+
|
| 71 |
+
app = Flask(__name__)
|
| 72 |
+
text_module = TextModuleV2()
|
| 73 |
+
|
| 74 |
+
# Register admin endpoints
|
| 75 |
+
register_admin_seed_endpoint(app, text_module)
|
| 76 |
+
|
| 77 |
+
app.run(port=5001)
|
| 78 |
+
```
|
| 79 |
+
|
| 80 |
+
Set admin token:
|
| 81 |
+
```bash
|
| 82 |
+
export ADMIN_SEED_TOKEN=your-secret-token
|
| 83 |
+
```
|
| 84 |
+
|
| 85 |
+
### Endpoints
|
| 86 |
+
|
| 87 |
+
#### GET /admin/aspect-seeds
|
| 88 |
+
Get current loaded seeds.
|
| 89 |
+
|
| 90 |
+
**Request**:
|
| 91 |
+
```bash
|
| 92 |
+
curl -H "X-Admin-Token: your-secret-token" \
|
| 93 |
+
http://localhost:5001/admin/aspect-seeds
|
| 94 |
+
```
|
| 95 |
+
|
| 96 |
+
**Response**:
|
| 97 |
+
```json
|
| 98 |
+
{
|
| 99 |
+
"success": true,
|
| 100 |
+
"seeds": {
|
| 101 |
+
"leadership": ["led a team", "managed project", ...],
|
| 102 |
+
"technical_skills": [...]
|
| 103 |
+
},
|
| 104 |
+
"num_aspects": 10
|
| 105 |
+
}
|
| 106 |
+
```
|
| 107 |
+
|
| 108 |
+
#### POST /admin/aspect-seeds
|
| 109 |
+
Update aspect seeds (recomputes centroids).
|
| 110 |
+
|
| 111 |
+
**Request**:
|
| 112 |
+
```bash
|
| 113 |
+
curl -X POST \
|
| 114 |
+
-H "X-Admin-Token: your-secret-token" \
|
| 115 |
+
-H "Content-Type: application/json" \
|
| 116 |
+
-d '{
|
| 117 |
+
"seeds": {
|
| 118 |
+
"leadership": [
|
| 119 |
+
"led a team",
|
| 120 |
+
"managed stakeholders",
|
| 121 |
+
"organized events"
|
| 122 |
+
],
|
| 123 |
+
"technical_skills": [
|
| 124 |
+
"developed web API",
|
| 125 |
+
"built ML models"
|
| 126 |
+
]
|
| 127 |
+
},
|
| 128 |
+
"persist": true
|
| 129 |
+
}' \
|
| 130 |
+
http://localhost:5001/admin/aspect-seeds
|
| 131 |
+
```
|
| 132 |
+
|
| 133 |
+
**Response**:
|
| 134 |
+
```json
|
| 135 |
+
{
|
| 136 |
+
"success": true,
|
| 137 |
+
"message": "Aspect seeds updated successfully",
|
| 138 |
+
"stats": {
|
| 139 |
+
"num_aspects": 2,
|
| 140 |
+
"avg_seed_count": 2.5,
|
| 141 |
+
"timestamp": "2025-12-09T10:30:00Z"
|
| 142 |
+
}
|
| 143 |
+
}
|
| 144 |
+
```
|
| 145 |
+
|
| 146 |
+
## Advanced: Seed Expansion
|
| 147 |
+
|
| 148 |
+
Suggest new seed phrases from a corpus:
|
| 149 |
+
|
| 150 |
+
```python
|
| 151 |
+
corpus = [
|
| 152 |
+
"I led the product development team and managed stakeholders",
|
| 153 |
+
"Implemented CI/CD pipelines for automated testing",
|
| 154 |
+
# ... more texts
|
| 155 |
+
]
|
| 156 |
+
|
| 157 |
+
suggestions = text_module.suggest_seed_expansions(
|
| 158 |
+
corpus_texts=corpus,
|
| 159 |
+
aspect_key='leadership',
|
| 160 |
+
top_n=20
|
| 161 |
+
)
|
| 162 |
+
|
| 163 |
+
print("Suggested seeds:", suggestions)
|
| 164 |
+
```
|
| 165 |
+
|
| 166 |
+
## Aspect → Question Mapping
|
| 167 |
+
|
| 168 |
+
```python
|
| 169 |
+
from services.text_module_v2 import get_relevant_aspects_for_question
|
| 170 |
+
|
| 171 |
+
# Q1: Strengths & skills
|
| 172 |
+
aspects_q1 = get_relevant_aspects_for_question('text_q1')
|
| 173 |
+
# ['technical_skills', 'problem_solving', 'learning_agility', 'initiative', 'communication']
|
| 174 |
+
|
| 175 |
+
# Q2: Career interests
|
| 176 |
+
aspects_q2 = get_relevant_aspects_for_question('text_q2')
|
| 177 |
+
# ['career_alignment', 'learning_agility', 'initiative', 'communication']
|
| 178 |
+
|
| 179 |
+
# Q3: Extracurriculars & leadership
|
| 180 |
+
aspects_q3 = get_relevant_aspects_for_question('text_q3')
|
| 181 |
+
# ['leadership', 'teamwork', 'project_execution', 'internships_experience', 'communication']
|
| 182 |
+
```
|
| 183 |
+
|
| 184 |
+
## Files
|
| 185 |
+
|
| 186 |
+
| File | Purpose |
|
| 187 |
+
|------|---------|
|
| 188 |
+
| `services/text_module_v2.py` | Main module implementation |
|
| 189 |
+
| `aspect_seeds.json` | Aspect seed definitions (editable) |
|
| 190 |
+
| `aspect_centroids.npz` | Cached centroids (auto-generated) |
|
| 191 |
+
|
| 192 |
+
## Performance
|
| 193 |
+
|
| 194 |
+
- **Model Load**: ~3s (first time)
|
| 195 |
+
- **Centroid Build**: ~1s for 10 aspects with 20 seeds each
|
| 196 |
+
- **Text Scoring**: ~200-500ms per 3-question set (CPU)
|
| 197 |
+
|
| 198 |
+
## Logging
|
| 199 |
+
|
| 200 |
+
Module logs to Python's `logging` system:
|
| 201 |
+
```python
|
| 202 |
+
import logging
|
| 203 |
+
logging.basicConfig(level=logging.INFO)
|
| 204 |
+
```
|
| 205 |
+
|
| 206 |
+
Key events logged:
|
| 207 |
+
- Model loading
|
| 208 |
+
- Seed updates (with masked token)
|
| 209 |
+
- Centroid recomputation
|
| 210 |
+
- File I/O operations
|
services/batch_aggregation.py
ADDED
|
@@ -0,0 +1,414 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Batch Aggregation Service - College-level macro analysis
|
| 3 |
+
Aggregates individual student scores into batch-level reports
|
| 4 |
+
"""
|
| 5 |
+
import logging
|
| 6 |
+
import numpy as np
|
| 7 |
+
from typing import Dict, List, Any, Optional
|
| 8 |
+
from dataclasses import dataclass, asdict
|
| 9 |
+
from datetime import datetime
|
| 10 |
+
from collections import Counter
|
| 11 |
+
|
| 12 |
+
logger = logging.getLogger(__name__)
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
@dataclass
|
| 16 |
+
class AggregateMetrics:
|
| 17 |
+
"""Batch-level aggregate metrics"""
|
| 18 |
+
total_students: int
|
| 19 |
+
avg_employability_score: float
|
| 20 |
+
median_score: float
|
| 21 |
+
std_dev: float
|
| 22 |
+
placement_ready_pct: float # % with score >= 0.6
|
| 23 |
+
skill_diversity_index: int # Unique skills count
|
| 24 |
+
avg_cgpa: float
|
| 25 |
+
avg_internship_months: float
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
@dataclass
|
| 29 |
+
class AspectDistribution:
|
| 30 |
+
"""Distribution stats for an aspect"""
|
| 31 |
+
aspect: str
|
| 32 |
+
avg: float
|
| 33 |
+
std: float
|
| 34 |
+
min: float
|
| 35 |
+
max: float
|
| 36 |
+
top_10_pct_avg: float # Avg of top 10%
|
| 37 |
+
bottom_10_pct_avg: float
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
@dataclass
|
| 41 |
+
class DomainBreakdown:
|
| 42 |
+
"""Domain-wise student distribution"""
|
| 43 |
+
domain_id: str
|
| 44 |
+
display_name: str
|
| 45 |
+
count: int
|
| 46 |
+
percentage: float
|
| 47 |
+
avg_score: float
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
@dataclass
|
| 51 |
+
class SkillGap:
|
| 52 |
+
"""Skill gap analysis result"""
|
| 53 |
+
skill: str
|
| 54 |
+
demand_score: float
|
| 55 |
+
students_with_skill: int
|
| 56 |
+
students_pct: float
|
| 57 |
+
gap_severity: str # 'critical', 'moderate', 'low'
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
@dataclass
|
| 61 |
+
class BatchRecommendation:
|
| 62 |
+
"""Recommendation for batch improvement"""
|
| 63 |
+
category: str # 'curriculum', 'training', 'industry'
|
| 64 |
+
priority: str # 'high', 'medium', 'low'
|
| 65 |
+
recommendation: str
|
| 66 |
+
impact: str
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
class BatchAggregationService:
|
| 70 |
+
"""
|
| 71 |
+
Aggregates individual student data into college-level macro reports
|
| 72 |
+
"""
|
| 73 |
+
|
| 74 |
+
# Thresholds
|
| 75 |
+
PLACEMENT_READY_THRESHOLD = 0.60
|
| 76 |
+
CRITICAL_GAP_THRESHOLD = 0.30 # < 30% students have skill
|
| 77 |
+
MODERATE_GAP_THRESHOLD = 0.50
|
| 78 |
+
|
| 79 |
+
def __init__(self):
|
| 80 |
+
# Industry demand mapping (can be loaded from external source)
|
| 81 |
+
self.industry_demands = {
|
| 82 |
+
'python': 0.90,
|
| 83 |
+
'sql': 0.85,
|
| 84 |
+
'java': 0.80,
|
| 85 |
+
'javascript': 0.75,
|
| 86 |
+
'machine_learning': 0.70,
|
| 87 |
+
'cloud': 0.85,
|
| 88 |
+
'devops': 0.75,
|
| 89 |
+
'data_analysis': 0.70,
|
| 90 |
+
'system_design': 0.65,
|
| 91 |
+
'communication': 0.80,
|
| 92 |
+
'leadership': 0.60,
|
| 93 |
+
'teamwork': 0.75
|
| 94 |
+
}
|
| 95 |
+
|
| 96 |
+
def aggregate_batch(self,
|
| 97 |
+
students: List[Dict[str, Any]],
|
| 98 |
+
college_name: str = "Unknown College",
|
| 99 |
+
batch_year: int = None) -> Dict[str, Any]:
|
| 100 |
+
"""
|
| 101 |
+
Generate comprehensive batch report from student data
|
| 102 |
+
|
| 103 |
+
Args:
|
| 104 |
+
students: List of student score packets (from scoring endpoint)
|
| 105 |
+
college_name: Name of the college
|
| 106 |
+
batch_year: Graduation year
|
| 107 |
+
|
| 108 |
+
Returns:
|
| 109 |
+
Complete macro analysis report
|
| 110 |
+
"""
|
| 111 |
+
if not students:
|
| 112 |
+
return self._empty_report(college_name, batch_year)
|
| 113 |
+
|
| 114 |
+
batch_year = batch_year or datetime.now().year
|
| 115 |
+
|
| 116 |
+
# Extract scores and features
|
| 117 |
+
scores = []
|
| 118 |
+
cgpas = []
|
| 119 |
+
internship_months = []
|
| 120 |
+
all_skills = []
|
| 121 |
+
domain_counts = Counter()
|
| 122 |
+
aspect_scores = {
|
| 123 |
+
'technical_skills': [],
|
| 124 |
+
'problem_solving': [],
|
| 125 |
+
'leadership': [],
|
| 126 |
+
'communication': [],
|
| 127 |
+
'teamwork': [],
|
| 128 |
+
'learning_agility': []
|
| 129 |
+
}
|
| 130 |
+
|
| 131 |
+
for student in students:
|
| 132 |
+
# Final score
|
| 133 |
+
final_score = student.get('final_score', 0)
|
| 134 |
+
scores.append(final_score)
|
| 135 |
+
|
| 136 |
+
# Features
|
| 137 |
+
features = student.get('detailed_features', {})
|
| 138 |
+
universal = features.get('universal', {})
|
| 139 |
+
text = features.get('text', {})
|
| 140 |
+
|
| 141 |
+
cgpas.append(universal.get('cgpa_norm', 0) * 10) # Denormalize
|
| 142 |
+
internship_months.append(universal.get('internship_exposure', 0) * 12)
|
| 143 |
+
|
| 144 |
+
# Domain
|
| 145 |
+
domain = student.get('domain_type') or student.get('detected_domain', 'general')
|
| 146 |
+
domain_counts[domain] += 1
|
| 147 |
+
|
| 148 |
+
# Aspect scores
|
| 149 |
+
for aspect in aspect_scores:
|
| 150 |
+
if aspect in text:
|
| 151 |
+
aspect_scores[aspect].append(text[aspect])
|
| 152 |
+
elif aspect in universal:
|
| 153 |
+
aspect_scores[aspect].append(universal[aspect])
|
| 154 |
+
|
| 155 |
+
# Skills (from raw student data if available)
|
| 156 |
+
if 'skills' in student:
|
| 157 |
+
skills = student['skills']
|
| 158 |
+
if isinstance(skills, str):
|
| 159 |
+
skills = [s.strip().lower() for s in skills.split(',')]
|
| 160 |
+
all_skills.extend(skills)
|
| 161 |
+
|
| 162 |
+
# Compute aggregates
|
| 163 |
+
aggregate = self._compute_aggregate_metrics(
|
| 164 |
+
scores, cgpas, internship_months, all_skills
|
| 165 |
+
)
|
| 166 |
+
|
| 167 |
+
# Aspect distributions
|
| 168 |
+
aspects = self._compute_aspect_distributions(aspect_scores)
|
| 169 |
+
|
| 170 |
+
# Domain breakdown
|
| 171 |
+
domains = self._compute_domain_breakdown(domain_counts, students)
|
| 172 |
+
|
| 173 |
+
# Skill gaps
|
| 174 |
+
skill_gaps = self._analyze_skill_gaps(all_skills, len(students))
|
| 175 |
+
|
| 176 |
+
# Recommendations
|
| 177 |
+
recommendations = self._generate_recommendations(
|
| 178 |
+
aggregate, aspects, skill_gaps
|
| 179 |
+
)
|
| 180 |
+
|
| 181 |
+
# Build report
|
| 182 |
+
report = {
|
| 183 |
+
'report_id': f"BATCH_{batch_year}_{college_name[:3].upper()}",
|
| 184 |
+
'college_name': college_name,
|
| 185 |
+
'batch_year': batch_year,
|
| 186 |
+
'generated_at': datetime.utcnow().isoformat() + 'Z',
|
| 187 |
+
'total_students': len(students),
|
| 188 |
+
|
| 189 |
+
'aggregate_metrics': asdict(aggregate),
|
| 190 |
+
|
| 191 |
+
'score_distribution': self._compute_score_distribution(scores),
|
| 192 |
+
|
| 193 |
+
'aspect_analysis': [asdict(a) for a in aspects],
|
| 194 |
+
|
| 195 |
+
'domain_breakdown': [asdict(d) for d in domains],
|
| 196 |
+
|
| 197 |
+
'skill_gap_analysis': [asdict(g) for g in skill_gaps],
|
| 198 |
+
|
| 199 |
+
'recommendations': [asdict(r) for r in recommendations],
|
| 200 |
+
|
| 201 |
+
'percentile_bands': self._compute_percentile_bands(scores)
|
| 202 |
+
}
|
| 203 |
+
|
| 204 |
+
return report
|
| 205 |
+
|
| 206 |
+
def _compute_aggregate_metrics(self, scores, cgpas, internship_months,
|
| 207 |
+
skills) -> AggregateMetrics:
|
| 208 |
+
"""Compute high-level aggregate metrics"""
|
| 209 |
+
scores_arr = np.array(scores)
|
| 210 |
+
|
| 211 |
+
placement_ready = sum(1 for s in scores if s >= self.PLACEMENT_READY_THRESHOLD)
|
| 212 |
+
placement_pct = (placement_ready / len(scores)) * 100 if scores else 0
|
| 213 |
+
|
| 214 |
+
return AggregateMetrics(
|
| 215 |
+
total_students=len(scores),
|
| 216 |
+
avg_employability_score=round(float(np.mean(scores_arr)), 3),
|
| 217 |
+
median_score=round(float(np.median(scores_arr)), 3),
|
| 218 |
+
std_dev=round(float(np.std(scores_arr)), 3),
|
| 219 |
+
placement_ready_pct=round(placement_pct, 1),
|
| 220 |
+
skill_diversity_index=len(set(skills)),
|
| 221 |
+
avg_cgpa=round(float(np.mean(cgpas)) if cgpas else 0, 2),
|
| 222 |
+
avg_internship_months=round(float(np.mean(internship_months)) if internship_months else 0, 1)
|
| 223 |
+
)
|
| 224 |
+
|
| 225 |
+
def _compute_aspect_distributions(self, aspect_scores) -> List[AspectDistribution]:
|
| 226 |
+
"""Compute distribution stats for each aspect"""
|
| 227 |
+
distributions = []
|
| 228 |
+
|
| 229 |
+
for aspect, scores in aspect_scores.items():
|
| 230 |
+
if not scores:
|
| 231 |
+
continue
|
| 232 |
+
|
| 233 |
+
arr = np.array(scores)
|
| 234 |
+
top_10_idx = int(len(arr) * 0.1) or 1
|
| 235 |
+
bottom_10_idx = int(len(arr) * 0.1) or 1
|
| 236 |
+
|
| 237 |
+
sorted_arr = np.sort(arr)
|
| 238 |
+
|
| 239 |
+
distributions.append(AspectDistribution(
|
| 240 |
+
aspect=aspect,
|
| 241 |
+
avg=round(float(np.mean(arr)), 3),
|
| 242 |
+
std=round(float(np.std(arr)), 3),
|
| 243 |
+
min=round(float(np.min(arr)), 3),
|
| 244 |
+
max=round(float(np.max(arr)), 3),
|
| 245 |
+
top_10_pct_avg=round(float(np.mean(sorted_arr[-top_10_idx:])), 3),
|
| 246 |
+
bottom_10_pct_avg=round(float(np.mean(sorted_arr[:bottom_10_idx])), 3)
|
| 247 |
+
))
|
| 248 |
+
|
| 249 |
+
return distributions
|
| 250 |
+
|
| 251 |
+
def _compute_domain_breakdown(self, domain_counts, students) -> List[DomainBreakdown]:
|
| 252 |
+
"""Compute domain-wise breakdown"""
|
| 253 |
+
breakdowns = []
|
| 254 |
+
total = len(students)
|
| 255 |
+
|
| 256 |
+
for domain, count in domain_counts.most_common():
|
| 257 |
+
# Calculate avg score for this domain
|
| 258 |
+
domain_scores = [
|
| 259 |
+
s.get('final_score', 0) for s in students
|
| 260 |
+
if (s.get('domain_type') or s.get('detected_domain', 'general')) == domain
|
| 261 |
+
]
|
| 262 |
+
avg_score = np.mean(domain_scores) if domain_scores else 0
|
| 263 |
+
|
| 264 |
+
breakdowns.append(DomainBreakdown(
|
| 265 |
+
domain_id=domain,
|
| 266 |
+
display_name=domain.replace('_', ' ').title(),
|
| 267 |
+
count=count,
|
| 268 |
+
percentage=round((count / total) * 100, 1),
|
| 269 |
+
avg_score=round(float(avg_score), 3)
|
| 270 |
+
))
|
| 271 |
+
|
| 272 |
+
return breakdowns
|
| 273 |
+
|
| 274 |
+
def _analyze_skill_gaps(self, all_skills, total_students) -> List[SkillGap]:
|
| 275 |
+
"""Analyze skill gaps against industry demand"""
|
| 276 |
+
skill_counts = Counter(all_skills)
|
| 277 |
+
gaps = []
|
| 278 |
+
|
| 279 |
+
for skill, demand in self.industry_demands.items():
|
| 280 |
+
count = skill_counts.get(skill, 0)
|
| 281 |
+
pct = (count / total_students) * 100 if total_students else 0
|
| 282 |
+
|
| 283 |
+
# Determine severity
|
| 284 |
+
if pct < self.CRITICAL_GAP_THRESHOLD * 100:
|
| 285 |
+
severity = 'critical'
|
| 286 |
+
elif pct < self.MODERATE_GAP_THRESHOLD * 100:
|
| 287 |
+
severity = 'moderate'
|
| 288 |
+
else:
|
| 289 |
+
severity = 'low'
|
| 290 |
+
|
| 291 |
+
gaps.append(SkillGap(
|
| 292 |
+
skill=skill,
|
| 293 |
+
demand_score=demand,
|
| 294 |
+
students_with_skill=count,
|
| 295 |
+
students_pct=round(pct, 1),
|
| 296 |
+
gap_severity=severity
|
| 297 |
+
))
|
| 298 |
+
|
| 299 |
+
# Sort by demand * (1 - coverage)
|
| 300 |
+
gaps.sort(key=lambda g: g.demand_score * (1 - g.students_pct/100), reverse=True)
|
| 301 |
+
|
| 302 |
+
return gaps[:10] # Top 10 gaps
|
| 303 |
+
|
| 304 |
+
def _generate_recommendations(self, aggregate, aspects,
|
| 305 |
+
skill_gaps) -> List[BatchRecommendation]:
|
| 306 |
+
"""Generate actionable recommendations"""
|
| 307 |
+
recommendations = []
|
| 308 |
+
|
| 309 |
+
# Critical skill gaps
|
| 310 |
+
critical_gaps = [g for g in skill_gaps if g.gap_severity == 'critical']
|
| 311 |
+
for gap in critical_gaps[:3]:
|
| 312 |
+
recommendations.append(BatchRecommendation(
|
| 313 |
+
category='curriculum',
|
| 314 |
+
priority='high',
|
| 315 |
+
recommendation=f"Add {gap.skill.replace('_', ' ').title()} training to curriculum",
|
| 316 |
+
impact=f"Only {gap.students_pct}% students have this in-demand skill"
|
| 317 |
+
))
|
| 318 |
+
|
| 319 |
+
# Low placement readiness
|
| 320 |
+
if aggregate.placement_ready_pct < 60:
|
| 321 |
+
recommendations.append(BatchRecommendation(
|
| 322 |
+
category='training',
|
| 323 |
+
priority='high',
|
| 324 |
+
recommendation="Implement intensive placement preparation program",
|
| 325 |
+
impact=f"Only {aggregate.placement_ready_pct}% students are placement-ready"
|
| 326 |
+
))
|
| 327 |
+
|
| 328 |
+
# Low internship exposure
|
| 329 |
+
if aggregate.avg_internship_months < 3:
|
| 330 |
+
recommendations.append(BatchRecommendation(
|
| 331 |
+
category='industry',
|
| 332 |
+
priority='medium',
|
| 333 |
+
recommendation="Establish mandatory internship partnerships with industry",
|
| 334 |
+
impact=f"Average internship exposure is only {aggregate.avg_internship_months} months"
|
| 335 |
+
))
|
| 336 |
+
|
| 337 |
+
# Weak aspects
|
| 338 |
+
for aspect in aspects:
|
| 339 |
+
if aspect.avg < 0.5:
|
| 340 |
+
recommendations.append(BatchRecommendation(
|
| 341 |
+
category='training',
|
| 342 |
+
priority='medium',
|
| 343 |
+
recommendation=f"Conduct workshops on {aspect.aspect.replace('_', ' ').title()}",
|
| 344 |
+
impact=f"Average score is {aspect.avg:.0%}, below acceptable threshold"
|
| 345 |
+
))
|
| 346 |
+
|
| 347 |
+
return recommendations[:8] # Limit to 8 recommendations
|
| 348 |
+
|
| 349 |
+
def _compute_score_distribution(self, scores) -> Dict[str, int]:
|
| 350 |
+
"""Compute score distribution by grade bands"""
|
| 351 |
+
distribution = {
|
| 352 |
+
'A+ (90-100%)': 0,
|
| 353 |
+
'A (80-90%)': 0,
|
| 354 |
+
'B+ (70-80%)': 0,
|
| 355 |
+
'B (60-70%)': 0,
|
| 356 |
+
'C (50-60%)': 0,
|
| 357 |
+
'D (<50%)': 0
|
| 358 |
+
}
|
| 359 |
+
|
| 360 |
+
for score in scores:
|
| 361 |
+
pct = score * 100
|
| 362 |
+
if pct >= 90:
|
| 363 |
+
distribution['A+ (90-100%)'] += 1
|
| 364 |
+
elif pct >= 80:
|
| 365 |
+
distribution['A (80-90%)'] += 1
|
| 366 |
+
elif pct >= 70:
|
| 367 |
+
distribution['B+ (70-80%)'] += 1
|
| 368 |
+
elif pct >= 60:
|
| 369 |
+
distribution['B (60-70%)'] += 1
|
| 370 |
+
elif pct >= 50:
|
| 371 |
+
distribution['C (50-60%)'] += 1
|
| 372 |
+
else:
|
| 373 |
+
distribution['D (<50%)'] += 1
|
| 374 |
+
|
| 375 |
+
return distribution
|
| 376 |
+
|
| 377 |
+
def _compute_percentile_bands(self, scores) -> Dict[str, float]:
|
| 378 |
+
"""Compute percentile thresholds"""
|
| 379 |
+
if not scores:
|
| 380 |
+
return {}
|
| 381 |
+
|
| 382 |
+
arr = np.array(scores)
|
| 383 |
+
return {
|
| 384 |
+
'p10': round(float(np.percentile(arr, 10)), 3),
|
| 385 |
+
'p25': round(float(np.percentile(arr, 25)), 3),
|
| 386 |
+
'p50': round(float(np.percentile(arr, 50)), 3),
|
| 387 |
+
'p75': round(float(np.percentile(arr, 75)), 3),
|
| 388 |
+
'p90': round(float(np.percentile(arr, 90)), 3)
|
| 389 |
+
}
|
| 390 |
+
|
| 391 |
+
def _empty_report(self, college_name: str, batch_year: int) -> Dict[str, Any]:
|
| 392 |
+
"""Generate empty report for no data"""
|
| 393 |
+
return {
|
| 394 |
+
'report_id': f"BATCH_{batch_year or 'UNKNOWN'}_{college_name[:3].upper()}",
|
| 395 |
+
'college_name': college_name,
|
| 396 |
+
'batch_year': batch_year,
|
| 397 |
+
'generated_at': datetime.utcnow().isoformat() + 'Z',
|
| 398 |
+
'total_students': 0,
|
| 399 |
+
'error': 'No student data provided',
|
| 400 |
+
'aggregate_metrics': None,
|
| 401 |
+
'recommendations': []
|
| 402 |
+
}
|
| 403 |
+
|
| 404 |
+
|
| 405 |
+
# Singleton
|
| 406 |
+
_batch_service: Optional[BatchAggregationService] = None
|
| 407 |
+
|
| 408 |
+
|
| 409 |
+
def get_batch_aggregation_service() -> BatchAggregationService:
|
| 410 |
+
"""Get singleton batch aggregation service"""
|
| 411 |
+
global _batch_service
|
| 412 |
+
if _batch_service is None:
|
| 413 |
+
_batch_service = BatchAggregationService()
|
| 414 |
+
return _batch_service
|
services/domain_knowledge_base.py
ADDED
|
@@ -0,0 +1,250 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Domain Knowledge Base - Dynamic domain-specific aspect prototypes and skill mapping
|
| 3 |
+
"""
|
| 4 |
+
import os
|
| 5 |
+
import json
|
| 6 |
+
import logging
|
| 7 |
+
from typing import Dict, List, Optional, Tuple
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
|
| 10 |
+
logger = logging.getLogger(__name__)
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class DomainConfig:
|
| 14 |
+
"""Single domain configuration"""
|
| 15 |
+
|
| 16 |
+
def __init__(self, config_data: Dict):
|
| 17 |
+
self.domain_id = config_data.get('domain_id', 'unknown')
|
| 18 |
+
self.display_name = config_data.get('display_name', 'Unknown Domain')
|
| 19 |
+
self.description = config_data.get('description', '')
|
| 20 |
+
self.core_skills = config_data.get('core_skills', [])
|
| 21 |
+
self.aspect_prototypes = config_data.get('aspect_prototypes', {})
|
| 22 |
+
self.industry_benchmarks = config_data.get('industry_benchmarks', {})
|
| 23 |
+
self.skill_gaps_mapping = config_data.get('skill_gaps_mapping', {})
|
| 24 |
+
self.detection_keywords = config_data.get('detection_keywords', [])
|
| 25 |
+
|
| 26 |
+
def get_aspect_seeds(self, aspect: str) -> List[str]:
|
| 27 |
+
"""Get seed phrases for a specific aspect"""
|
| 28 |
+
return self.aspect_prototypes.get(aspect, [])
|
| 29 |
+
|
| 30 |
+
def get_all_aspect_seeds(self) -> Dict[str, List[str]]:
|
| 31 |
+
"""Get all aspect prototypes"""
|
| 32 |
+
return self.aspect_prototypes.copy()
|
| 33 |
+
|
| 34 |
+
def get_skill_gap_info(self, skill: str) -> Optional[Dict]:
|
| 35 |
+
"""Get skill gap information including courses and certs"""
|
| 36 |
+
return self.skill_gaps_mapping.get(skill)
|
| 37 |
+
|
| 38 |
+
def get_benchmark(self, key: str, default=None):
|
| 39 |
+
"""Get industry benchmark value"""
|
| 40 |
+
return self.industry_benchmarks.get(key, default)
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
class DomainKnowledgeBase:
|
| 44 |
+
"""
|
| 45 |
+
Domain Knowledge Base - loads and manages domain configurations
|
| 46 |
+
Provides domain-specific aspect prototypes for the Fidelity Criteria Transformer
|
| 47 |
+
"""
|
| 48 |
+
|
| 49 |
+
def __init__(self, domains_dir: str = None):
|
| 50 |
+
"""
|
| 51 |
+
Initialize DKB with domain configs from directory
|
| 52 |
+
|
| 53 |
+
Args:
|
| 54 |
+
domains_dir: Path to directory containing domain JSON files
|
| 55 |
+
Defaults to ./domains/ relative to this file
|
| 56 |
+
"""
|
| 57 |
+
if domains_dir is None:
|
| 58 |
+
domains_dir = os.path.join(os.path.dirname(__file__), 'domains')
|
| 59 |
+
|
| 60 |
+
self.domains_dir = Path(domains_dir)
|
| 61 |
+
self.domains: Dict[str, DomainConfig] = {}
|
| 62 |
+
self._keyword_index: Dict[str, str] = {} # keyword -> domain_id
|
| 63 |
+
|
| 64 |
+
self._load_all_domains()
|
| 65 |
+
self._build_keyword_index()
|
| 66 |
+
|
| 67 |
+
logger.info(f"DomainKnowledgeBase initialized with {len(self.domains)} domains")
|
| 68 |
+
|
| 69 |
+
def _load_all_domains(self):
|
| 70 |
+
"""Load all domain configs from directory"""
|
| 71 |
+
if not self.domains_dir.exists():
|
| 72 |
+
logger.warning(f"Domains directory not found: {self.domains_dir}")
|
| 73 |
+
return
|
| 74 |
+
|
| 75 |
+
for json_file in self.domains_dir.glob('*.json'):
|
| 76 |
+
try:
|
| 77 |
+
with open(json_file, 'r', encoding='utf-8') as f:
|
| 78 |
+
config_data = json.load(f)
|
| 79 |
+
|
| 80 |
+
domain_config = DomainConfig(config_data)
|
| 81 |
+
self.domains[domain_config.domain_id] = domain_config
|
| 82 |
+
logger.info(f"Loaded domain config: {domain_config.display_name}")
|
| 83 |
+
|
| 84 |
+
except Exception as e:
|
| 85 |
+
logger.error(f"Failed to load domain config {json_file}: {e}")
|
| 86 |
+
|
| 87 |
+
def _build_keyword_index(self):
|
| 88 |
+
"""Build keyword -> domain mapping for detection"""
|
| 89 |
+
for domain_id, config in self.domains.items():
|
| 90 |
+
for keyword in config.detection_keywords:
|
| 91 |
+
self._keyword_index[keyword.lower()] = domain_id
|
| 92 |
+
|
| 93 |
+
def get_domain(self, domain_id: str) -> Optional[DomainConfig]:
|
| 94 |
+
"""Get domain config by ID"""
|
| 95 |
+
return self.domains.get(domain_id)
|
| 96 |
+
|
| 97 |
+
def list_domains(self) -> List[str]:
|
| 98 |
+
"""List all available domain IDs"""
|
| 99 |
+
return list(self.domains.keys())
|
| 100 |
+
|
| 101 |
+
def detect_domain(self, text: str, skills: List[str] = None) -> Tuple[str, float]:
|
| 102 |
+
"""
|
| 103 |
+
Detect most likely domain from text and/or skills
|
| 104 |
+
|
| 105 |
+
Args:
|
| 106 |
+
text: Text content (career goals, descriptions, etc.)
|
| 107 |
+
skills: List of skill keywords
|
| 108 |
+
|
| 109 |
+
Returns:
|
| 110 |
+
(domain_id, confidence) tuple
|
| 111 |
+
"""
|
| 112 |
+
if not text and not skills:
|
| 113 |
+
return ('general', 0.0)
|
| 114 |
+
|
| 115 |
+
text_lower = (text or '').lower()
|
| 116 |
+
skills_lower = [s.lower() for s in (skills or [])]
|
| 117 |
+
|
| 118 |
+
domain_scores = {}
|
| 119 |
+
|
| 120 |
+
for domain_id, config in self.domains.items():
|
| 121 |
+
score = 0.0
|
| 122 |
+
|
| 123 |
+
# Keyword matching from text
|
| 124 |
+
for keyword in config.detection_keywords:
|
| 125 |
+
if keyword.lower() in text_lower:
|
| 126 |
+
score += 0.1
|
| 127 |
+
|
| 128 |
+
# Skill matching
|
| 129 |
+
core_skills_lower = [s.lower() for s in config.core_skills]
|
| 130 |
+
skill_matches = sum(1 for s in skills_lower if s in core_skills_lower)
|
| 131 |
+
score += skill_matches * 0.15
|
| 132 |
+
|
| 133 |
+
domain_scores[domain_id] = min(score, 1.0)
|
| 134 |
+
|
| 135 |
+
if not domain_scores:
|
| 136 |
+
return ('general', 0.0)
|
| 137 |
+
|
| 138 |
+
# Return domain with highest score
|
| 139 |
+
best_domain = max(domain_scores, key=domain_scores.get)
|
| 140 |
+
confidence = domain_scores[best_domain]
|
| 141 |
+
|
| 142 |
+
# Minimum confidence threshold
|
| 143 |
+
if confidence < 0.2:
|
| 144 |
+
return ('general', confidence)
|
| 145 |
+
|
| 146 |
+
return (best_domain, confidence)
|
| 147 |
+
|
| 148 |
+
def get_aspect_prototypes_for_domain(self, domain_id: str) -> Dict[str, List[str]]:
|
| 149 |
+
"""Get all aspect prototypes for a domain"""
|
| 150 |
+
config = self.domains.get(domain_id)
|
| 151 |
+
if config:
|
| 152 |
+
return config.get_all_aspect_seeds()
|
| 153 |
+
return {}
|
| 154 |
+
|
| 155 |
+
def get_merged_prototypes(self, detected_domain: str,
|
| 156 |
+
base_aspects: Dict[str, List[str]]) -> Dict[str, List[str]]:
|
| 157 |
+
"""
|
| 158 |
+
Merge domain-specific prototypes with base aspects
|
| 159 |
+
Domain-specific seeds are added to base seeds
|
| 160 |
+
|
| 161 |
+
Args:
|
| 162 |
+
detected_domain: Domain ID from detection
|
| 163 |
+
base_aspects: Base aspect seeds (from TextModuleV2 defaults)
|
| 164 |
+
|
| 165 |
+
Returns:
|
| 166 |
+
Merged aspect seeds dictionary
|
| 167 |
+
"""
|
| 168 |
+
merged = {k: list(v) for k, v in base_aspects.items()} # Deep copy
|
| 169 |
+
|
| 170 |
+
domain_config = self.domains.get(detected_domain)
|
| 171 |
+
if not domain_config:
|
| 172 |
+
return merged
|
| 173 |
+
|
| 174 |
+
# Merge domain-specific prototypes
|
| 175 |
+
for aspect, seeds in domain_config.aspect_prototypes.items():
|
| 176 |
+
if aspect in merged:
|
| 177 |
+
# Prepend domain-specific seeds (higher priority)
|
| 178 |
+
merged[aspect] = seeds + merged[aspect]
|
| 179 |
+
else:
|
| 180 |
+
merged[aspect] = seeds
|
| 181 |
+
|
| 182 |
+
return merged
|
| 183 |
+
|
| 184 |
+
def analyze_skill_gaps(self, student_skills: List[str],
|
| 185 |
+
domain_id: str) -> List[Dict]:
|
| 186 |
+
"""
|
| 187 |
+
Analyze skill gaps for a student in a given domain
|
| 188 |
+
|
| 189 |
+
Args:
|
| 190 |
+
student_skills: List of skills the student has
|
| 191 |
+
domain_id: Target domain
|
| 192 |
+
|
| 193 |
+
Returns:
|
| 194 |
+
List of skill gap objects with recommendations
|
| 195 |
+
"""
|
| 196 |
+
config = self.domains.get(domain_id)
|
| 197 |
+
if not config:
|
| 198 |
+
return []
|
| 199 |
+
|
| 200 |
+
student_skills_lower = [s.lower() for s in student_skills]
|
| 201 |
+
gaps = []
|
| 202 |
+
|
| 203 |
+
for skill, gap_info in config.skill_gaps_mapping.items():
|
| 204 |
+
skill_lower = skill.lower()
|
| 205 |
+
|
| 206 |
+
# Check if student has this skill
|
| 207 |
+
has_skill = any(skill_lower in s or s in skill_lower
|
| 208 |
+
for s in student_skills_lower)
|
| 209 |
+
|
| 210 |
+
if not has_skill:
|
| 211 |
+
gaps.append({
|
| 212 |
+
'skill': skill,
|
| 213 |
+
'demand_score': gap_info.get('demand_score', 0.5),
|
| 214 |
+
'recommended_courses': gap_info.get('courses', []),
|
| 215 |
+
'certifications': gap_info.get('certifications', []),
|
| 216 |
+
'priority': 'high' if gap_info.get('demand_score', 0) > 0.7 else 'medium'
|
| 217 |
+
})
|
| 218 |
+
|
| 219 |
+
# Sort by demand score
|
| 220 |
+
gaps.sort(key=lambda x: x['demand_score'], reverse=True)
|
| 221 |
+
return gaps
|
| 222 |
+
|
| 223 |
+
def get_domain_summary(self, domain_id: str) -> Optional[Dict]:
|
| 224 |
+
"""Get summary of a domain for reporting"""
|
| 225 |
+
config = self.domains.get(domain_id)
|
| 226 |
+
if not config:
|
| 227 |
+
return None
|
| 228 |
+
|
| 229 |
+
return {
|
| 230 |
+
'domain_id': config.domain_id,
|
| 231 |
+
'display_name': config.display_name,
|
| 232 |
+
'description': config.description,
|
| 233 |
+
'core_skills_count': len(config.core_skills),
|
| 234 |
+
'aspects_count': len(config.aspect_prototypes),
|
| 235 |
+
'benchmarks': config.industry_benchmarks
|
| 236 |
+
}
|
| 237 |
+
|
| 238 |
+
|
| 239 |
+
# Singleton instance
|
| 240 |
+
_dkb_instance: Optional[DomainKnowledgeBase] = None
|
| 241 |
+
|
| 242 |
+
|
| 243 |
+
def get_domain_knowledge_base(domains_dir: str = None) -> DomainKnowledgeBase:
|
| 244 |
+
"""Get or create singleton DomainKnowledgeBase instance"""
|
| 245 |
+
global _dkb_instance
|
| 246 |
+
|
| 247 |
+
if _dkb_instance is None:
|
| 248 |
+
_dkb_instance = DomainKnowledgeBase(domains_dir)
|
| 249 |
+
|
| 250 |
+
return _dkb_instance
|
services/domain_plugins/__init__.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Domain-Specific Plugins Module
|
| 2 |
+
|
| 3 |
+
Pluggable architecture for domain-specific scoring (Tech, Business, Creative, Research).
|
| 4 |
+
Each plugin returns domain_score, domain_confidence, and raw features.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
from .base_plugin import BaseDomainPlugin
|
| 8 |
+
from .plugin_factory import DomainPluginFactory
|
| 9 |
+
|
| 10 |
+
__all__ = ['BaseDomainPlugin', 'DomainPluginFactory']
|
services/domain_plugins/base_plugin.py
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Base Domain Plugin Interface"""
|
| 2 |
+
from abc import ABC, abstractmethod
|
| 3 |
+
from typing import Dict, Tuple, List, Optional
|
| 4 |
+
from dataclasses import dataclass
|
| 5 |
+
|
| 6 |
+
@dataclass
|
| 7 |
+
class DomainScore:
|
| 8 |
+
"""Standardized domain scoring output"""
|
| 9 |
+
domain_type: str
|
| 10 |
+
score: float # 0-1
|
| 11 |
+
confidence: float # 0-1
|
| 12 |
+
raw_features: Dict # Raw feature values for explainability
|
| 13 |
+
processing_time_ms: float
|
| 14 |
+
|
| 15 |
+
def to_dict(self):
|
| 16 |
+
return {
|
| 17 |
+
'domain_type': self.domain_type,
|
| 18 |
+
'score': round(self.score, 3),
|
| 19 |
+
'confidence': round(self.confidence, 3),
|
| 20 |
+
'raw_features': self.raw_features,
|
| 21 |
+
'processing_time_ms': round(self.processing_time_ms, 2)
|
| 22 |
+
}
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
class BaseDomainPlugin(ABC):
|
| 26 |
+
"""Abstract base class for all domain plugins"""
|
| 27 |
+
|
| 28 |
+
def __init__(self):
|
| 29 |
+
self.domain_type = self._get_domain_type()
|
| 30 |
+
self.feature_weights = self._get_feature_weights()
|
| 31 |
+
|
| 32 |
+
@abstractmethod
|
| 33 |
+
def _get_domain_type(self) -> str:
|
| 34 |
+
"""Return domain identifier (e.g., 'tech', 'business')"""
|
| 35 |
+
pass
|
| 36 |
+
|
| 37 |
+
@abstractmethod
|
| 38 |
+
def _get_feature_weights(self) -> Dict[str, float]:
|
| 39 |
+
"""Return feature name to weight mapping"""
|
| 40 |
+
pass
|
| 41 |
+
|
| 42 |
+
@abstractmethod
|
| 43 |
+
def get_required_fields(self) -> List[str]:
|
| 44 |
+
"""Return list of required input fields for this domain"""
|
| 45 |
+
pass
|
| 46 |
+
|
| 47 |
+
@abstractmethod
|
| 48 |
+
def get_optional_fields(self) -> List[str]:
|
| 49 |
+
"""Return list of optional input fields"""
|
| 50 |
+
pass
|
| 51 |
+
|
| 52 |
+
def validate_inputs(self, evidence_data: Dict) -> Tuple[bool, Optional[str]]:
|
| 53 |
+
"""
|
| 54 |
+
Validate input data completeness
|
| 55 |
+
Returns: (is_valid, error_message)
|
| 56 |
+
"""
|
| 57 |
+
required = self.get_required_fields()
|
| 58 |
+
missing = [f for f in required if not evidence_data.get(f)]
|
| 59 |
+
|
| 60 |
+
if missing:
|
| 61 |
+
return False, f"Missing required fields: {', '.join(missing)}"
|
| 62 |
+
|
| 63 |
+
return True, None
|
| 64 |
+
|
| 65 |
+
@abstractmethod
|
| 66 |
+
def score(self, evidence_data: Dict) -> DomainScore:
|
| 67 |
+
"""
|
| 68 |
+
Main scoring method - must be implemented by each plugin
|
| 69 |
+
|
| 70 |
+
Args:
|
| 71 |
+
evidence_data: Dictionary containing domain-specific inputs
|
| 72 |
+
|
| 73 |
+
Returns:
|
| 74 |
+
DomainScore object with score, confidence, and features
|
| 75 |
+
"""
|
| 76 |
+
pass
|
| 77 |
+
|
| 78 |
+
def explain(self, features: Dict) -> Dict:
|
| 79 |
+
"""Generate human-readable explanation of scoring"""
|
| 80 |
+
explanations = {
|
| 81 |
+
'top_features': [],
|
| 82 |
+
'recommendations': []
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
# Sort features by value
|
| 86 |
+
sorted_features = sorted(features.items(), key=lambda x: x[1], reverse=True)
|
| 87 |
+
|
| 88 |
+
# Top 3 features
|
| 89 |
+
for feat, val in sorted_features[:3]:
|
| 90 |
+
if val > 0.3:
|
| 91 |
+
explanations['top_features'].append({
|
| 92 |
+
'feature': feat,
|
| 93 |
+
'value': round(val, 2),
|
| 94 |
+
'weight': self.feature_weights.get(feat, 0)
|
| 95 |
+
})
|
| 96 |
+
|
| 97 |
+
return explanations
|
| 98 |
+
|
| 99 |
+
def calculate_confidence(self, evidence_data: Dict) -> float:
|
| 100 |
+
"""
|
| 101 |
+
Calculate confidence based on data completeness and quality
|
| 102 |
+
Returns: 0-1 confidence score
|
| 103 |
+
"""
|
| 104 |
+
required_fields = self.get_required_fields()
|
| 105 |
+
optional_fields = self.get_optional_fields()
|
| 106 |
+
total_fields = len(required_fields) + len(optional_fields)
|
| 107 |
+
|
| 108 |
+
filled_required = sum(1 for f in required_fields if evidence_data.get(f))
|
| 109 |
+
filled_optional = sum(1 for f in optional_fields if evidence_data.get(f))
|
| 110 |
+
|
| 111 |
+
# Base confidence from required fields (70%)
|
| 112 |
+
required_confidence = (filled_required / len(required_fields)) * 0.7 if required_fields else 0.7
|
| 113 |
+
|
| 114 |
+
# Bonus from optional fields (30%)
|
| 115 |
+
optional_confidence = (filled_optional / len(optional_fields)) * 0.3 if optional_fields else 0.3
|
| 116 |
+
|
| 117 |
+
return min(required_confidence + optional_confidence, 1.0)
|
services/domain_plugins/business_plugin.py
ADDED
|
@@ -0,0 +1,205 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Business/Finance Domain Plugin
|
| 2 |
+
|
| 3 |
+
Scores business competency based on:
|
| 4 |
+
- Resume content (ATS-style keyword matching)
|
| 5 |
+
- Case study submission analysis
|
| 6 |
+
- Excel/analytical test scores
|
| 7 |
+
- Internship experience in business domains
|
| 8 |
+
"""
|
| 9 |
+
import re
|
| 10 |
+
import time
|
| 11 |
+
import logging
|
| 12 |
+
from typing import Dict, List
|
| 13 |
+
from .base_plugin import BaseDomainPlugin, DomainScore
|
| 14 |
+
from .plugin_factory import register_plugin
|
| 15 |
+
|
| 16 |
+
logger = logging.getLogger(__name__)
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
@register_plugin('business')
|
| 20 |
+
class BusinessPlugin(BaseDomainPlugin):
|
| 21 |
+
"""Business/Finance domain scoring plugin"""
|
| 22 |
+
|
| 23 |
+
def __init__(self):
|
| 24 |
+
super().__init__()
|
| 25 |
+
# Business-relevant keywords
|
| 26 |
+
self.business_keywords = {
|
| 27 |
+
'consulting': ['consulting', 'consultant', 'advisory', 'strategy', 'mckinsey', 'bain', 'bcg'],
|
| 28 |
+
'finance': ['finance', 'banking', 'investment', 'equity', 'portfolio', 'analyst', 'goldman', 'morgan'],
|
| 29 |
+
'analytics': ['data analysis', 'business intelligence', 'tableau', 'power bi', 'sql', 'excel'],
|
| 30 |
+
'management': ['project management', 'product management', 'stakeholder', 'agile', 'scrum'],
|
| 31 |
+
'sales': ['sales', 'business development', 'client acquisition', 'revenue', 'crm'],
|
| 32 |
+
'operations': ['operations', 'supply chain', 'logistics', 'process improvement', 'lean', 'six sigma']
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
def _get_domain_type(self) -> str:
|
| 36 |
+
return 'business'
|
| 37 |
+
|
| 38 |
+
def _get_feature_weights(self) -> Dict[str, float]:
|
| 39 |
+
return {
|
| 40 |
+
'resume_keyword_score': 0.30,
|
| 41 |
+
'internship_relevance': 0.25,
|
| 42 |
+
'case_study_score': 0.20,
|
| 43 |
+
'excel_test_score': 0.15,
|
| 44 |
+
'business_depth': 0.10
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
def get_required_fields(self) -> List[str]:
|
| 48 |
+
return ['resume_text'] # Resume text (extracted from PDF)
|
| 49 |
+
|
| 50 |
+
def get_optional_fields(self) -> List[str]:
|
| 51 |
+
return ['case_study_text', 'excel_test_score', 'internship_descriptions']
|
| 52 |
+
|
| 53 |
+
def score(self, evidence_data: Dict) -> DomainScore:
|
| 54 |
+
"""Calculate business domain score"""
|
| 55 |
+
start_time = time.time()
|
| 56 |
+
features = {}
|
| 57 |
+
|
| 58 |
+
# Resume keyword analysis
|
| 59 |
+
resume_text = evidence_data.get('resume_text', '')
|
| 60 |
+
if resume_text:
|
| 61 |
+
features['resume_keyword_score'] = self._analyze_resume_keywords(resume_text)
|
| 62 |
+
features['internship_relevance'] = self._extract_internship_relevance(resume_text)
|
| 63 |
+
features['business_depth'] = self._assess_business_depth(resume_text)
|
| 64 |
+
else:
|
| 65 |
+
features['resume_keyword_score'] = 0.0
|
| 66 |
+
features['internship_relevance'] = 0.0
|
| 67 |
+
features['business_depth'] = 0.0
|
| 68 |
+
|
| 69 |
+
# Case study analysis
|
| 70 |
+
case_study = evidence_data.get('case_study_text', '')
|
| 71 |
+
if case_study:
|
| 72 |
+
features['case_study_score'] = self._analyze_case_study(case_study)
|
| 73 |
+
else:
|
| 74 |
+
features['case_study_score'] = 0.0
|
| 75 |
+
|
| 76 |
+
# Excel test score (normalized 0-100 to 0-1)
|
| 77 |
+
excel_score = evidence_data.get('excel_test_score', 0)
|
| 78 |
+
features['excel_test_score'] = min(excel_score / 100, 1.0) if excel_score else 0.0
|
| 79 |
+
|
| 80 |
+
# Calculate weighted score
|
| 81 |
+
score = sum(features[k] * self.feature_weights[k] for k in features.keys())
|
| 82 |
+
|
| 83 |
+
# Calculate confidence
|
| 84 |
+
confidence = self.calculate_confidence(evidence_data)
|
| 85 |
+
|
| 86 |
+
processing_time = (time.time() - start_time) * 1000
|
| 87 |
+
|
| 88 |
+
return DomainScore(
|
| 89 |
+
domain_type='business',
|
| 90 |
+
score=min(score, 1.0),
|
| 91 |
+
confidence=confidence,
|
| 92 |
+
raw_features=features,
|
| 93 |
+
processing_time_ms=processing_time
|
| 94 |
+
)
|
| 95 |
+
|
| 96 |
+
def _analyze_resume_keywords(self, resume_text: str) -> float:
|
| 97 |
+
"""
|
| 98 |
+
ATS-style keyword matching for business roles
|
| 99 |
+
Returns: 0-1 score based on keyword density and relevance
|
| 100 |
+
"""
|
| 101 |
+
text_lower = resume_text.lower()
|
| 102 |
+
|
| 103 |
+
# Count keywords in each category
|
| 104 |
+
category_scores = {}
|
| 105 |
+
for category, keywords in self.business_keywords.items():
|
| 106 |
+
matches = sum(1 for kw in keywords if kw in text_lower)
|
| 107 |
+
category_scores[category] = min(matches / len(keywords), 1.0)
|
| 108 |
+
|
| 109 |
+
# Average across categories with some categories weighted more
|
| 110 |
+
weights = {
|
| 111 |
+
'consulting': 0.20,
|
| 112 |
+
'finance': 0.20,
|
| 113 |
+
'analytics': 0.20,
|
| 114 |
+
'management': 0.15,
|
| 115 |
+
'sales': 0.15,
|
| 116 |
+
'operations': 0.10
|
| 117 |
+
}
|
| 118 |
+
|
| 119 |
+
score = sum(category_scores.get(cat, 0) * weight for cat, weight in weights.items())
|
| 120 |
+
|
| 121 |
+
logger.info(f"Resume keyword score: {score:.2f} (categories: {category_scores})")
|
| 122 |
+
return score
|
| 123 |
+
|
| 124 |
+
def _extract_internship_relevance(self, resume_text: str) -> float:
|
| 125 |
+
"""
|
| 126 |
+
Extract and score internship relevance to business
|
| 127 |
+
Returns: 0-1 score based on business-related internships
|
| 128 |
+
"""
|
| 129 |
+
text_lower = resume_text.lower()
|
| 130 |
+
|
| 131 |
+
# Internship indicators
|
| 132 |
+
internship_patterns = [
|
| 133 |
+
r'intern(?:ship)?\s+at\s+([^\n]+)',
|
| 134 |
+
r'(?:summer|winter)\s+intern',
|
| 135 |
+
r'([a-z\s]+)\s+intern'
|
| 136 |
+
]
|
| 137 |
+
|
| 138 |
+
internship_mentions = []
|
| 139 |
+
for pattern in internship_patterns:
|
| 140 |
+
matches = re.findall(pattern, text_lower)
|
| 141 |
+
internship_mentions.extend(matches)
|
| 142 |
+
|
| 143 |
+
if not internship_mentions:
|
| 144 |
+
return 0.0
|
| 145 |
+
|
| 146 |
+
# Score based on business keyword overlap in internship context
|
| 147 |
+
business_internship_score = 0.0
|
| 148 |
+
for mention in internship_mentions[:5]: # Top 5 internships
|
| 149 |
+
mention_text = mention if isinstance(mention, str) else ' '.join(mention)
|
| 150 |
+
for category, keywords in self.business_keywords.items():
|
| 151 |
+
if any(kw in mention_text for kw in keywords):
|
| 152 |
+
business_internship_score += 0.2
|
| 153 |
+
|
| 154 |
+
score = min(business_internship_score, 1.0)
|
| 155 |
+
logger.info(f"Internship relevance: {score:.2f}")
|
| 156 |
+
return score
|
| 157 |
+
|
| 158 |
+
def _assess_business_depth(self, resume_text: str) -> float:
|
| 159 |
+
"""
|
| 160 |
+
Assess overall business knowledge depth
|
| 161 |
+
Returns: 0-1 score based on technical business terms
|
| 162 |
+
"""
|
| 163 |
+
text_lower = resume_text.lower()
|
| 164 |
+
|
| 165 |
+
# Advanced business terms
|
| 166 |
+
advanced_terms = [
|
| 167 |
+
'financial modeling', 'valuation', 'dcf', 'market research',
|
| 168 |
+
'competitive analysis', 'business plan', 'roi', 'kpi',
|
| 169 |
+
'p&l', 'balance sheet', 'cash flow', 'stakeholder management',
|
| 170 |
+
'go-to-market', 'pricing strategy', 'market segmentation'
|
| 171 |
+
]
|
| 172 |
+
|
| 173 |
+
term_count = sum(1 for term in advanced_terms if term in text_lower)
|
| 174 |
+
score = min(term_count / 10, 1.0) # 10+ terms = max
|
| 175 |
+
|
| 176 |
+
logger.info(f"Business depth score: {score:.2f} ({term_count} advanced terms)")
|
| 177 |
+
return score
|
| 178 |
+
|
| 179 |
+
def _analyze_case_study(self, case_study_text: str) -> float:
|
| 180 |
+
"""
|
| 181 |
+
Analyze case study submission quality
|
| 182 |
+
Returns: 0-1 score based on structure and depth
|
| 183 |
+
"""
|
| 184 |
+
if not case_study_text or len(case_study_text) < 100:
|
| 185 |
+
return 0.0
|
| 186 |
+
|
| 187 |
+
score = 0.0
|
| 188 |
+
text_lower = case_study_text.lower()
|
| 189 |
+
|
| 190 |
+
# Structure indicators
|
| 191 |
+
structure_keywords = ['problem', 'analysis', 'solution', 'recommendation', 'conclusion']
|
| 192 |
+
structure_score = sum(0.1 for kw in structure_keywords if kw in text_lower)
|
| 193 |
+
score += min(structure_score, 0.4)
|
| 194 |
+
|
| 195 |
+
# Analytical depth
|
| 196 |
+
analytical_terms = ['data', 'metric', 'assumption', 'framework', 'hypothesis', 'evidence']
|
| 197 |
+
analytical_score = sum(0.05 for term in analytical_terms if term in text_lower)
|
| 198 |
+
score += min(analytical_score, 0.3)
|
| 199 |
+
|
| 200 |
+
# Length (quality proxy)
|
| 201 |
+
length_score = min(len(case_study_text) / 2000, 0.3) # 2000+ chars = max
|
| 202 |
+
score += length_score
|
| 203 |
+
|
| 204 |
+
logger.info(f"Case study score: {score:.2f}")
|
| 205 |
+
return min(score, 1.0)
|
services/domain_plugins/creative_plugin.py
ADDED
|
@@ -0,0 +1,210 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Creative/Design Domain Plugin
|
| 2 |
+
|
| 3 |
+
Scores creative competency based on:
|
| 4 |
+
- Portfolio links (Behance, Dribbble, personal site)
|
| 5 |
+
- Project diversity and quality
|
| 6 |
+
- Design tool proficiency
|
| 7 |
+
- Visual content analysis
|
| 8 |
+
"""
|
| 9 |
+
import re
|
| 10 |
+
import time
|
| 11 |
+
import logging
|
| 12 |
+
import requests
|
| 13 |
+
from typing import Dict, List
|
| 14 |
+
from .base_plugin import BaseDomainPlugin, DomainScore
|
| 15 |
+
from .plugin_factory import register_plugin
|
| 16 |
+
|
| 17 |
+
logger = logging.getLogger(__name__)
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
@register_plugin('creative')
|
| 21 |
+
class CreativePlugin(BaseDomainPlugin):
|
| 22 |
+
"""Creative/Design domain scoring plugin"""
|
| 23 |
+
|
| 24 |
+
def __init__(self):
|
| 25 |
+
super().__init__()
|
| 26 |
+
# Design tools and platforms
|
| 27 |
+
self.design_tools = [
|
| 28 |
+
'figma', 'sketch', 'adobe xd', 'photoshop', 'illustrator',
|
| 29 |
+
'after effects', 'premiere pro', 'blender', 'cinema 4d'
|
| 30 |
+
]
|
| 31 |
+
self.portfolio_platforms = ['behance', 'dribbble', 'artstation', 'deviantart']
|
| 32 |
+
|
| 33 |
+
def _get_domain_type(self) -> str:
|
| 34 |
+
return 'creative'
|
| 35 |
+
|
| 36 |
+
def _get_feature_weights(self) -> Dict[str, float]:
|
| 37 |
+
return {
|
| 38 |
+
'portfolio_quality': 0.35,
|
| 39 |
+
'project_diversity': 0.25,
|
| 40 |
+
'tool_proficiency': 0.20,
|
| 41 |
+
'platform_presence': 0.15,
|
| 42 |
+
'description_depth': 0.05
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
def get_required_fields(self) -> List[str]:
|
| 46 |
+
return ['portfolio_url']
|
| 47 |
+
|
| 48 |
+
def get_optional_fields(self) -> List[str]:
|
| 49 |
+
return ['behance_url', 'dribbble_url', 'design_tools_text', 'project_description']
|
| 50 |
+
|
| 51 |
+
def score(self, evidence_data: Dict) -> DomainScore:
|
| 52 |
+
"""Calculate creative domain score"""
|
| 53 |
+
start_time = time.time()
|
| 54 |
+
features = {}
|
| 55 |
+
|
| 56 |
+
# Portfolio analysis
|
| 57 |
+
portfolio_url = evidence_data.get('portfolio_url', '')
|
| 58 |
+
if portfolio_url:
|
| 59 |
+
features['portfolio_quality'] = self._analyze_portfolio_quality(portfolio_url)
|
| 60 |
+
else:
|
| 61 |
+
features['portfolio_quality'] = 0.0
|
| 62 |
+
|
| 63 |
+
# Platform presence
|
| 64 |
+
behance_url = evidence_data.get('behance_url', '')
|
| 65 |
+
dribbble_url = evidence_data.get('dribbble_url', '')
|
| 66 |
+
features['platform_presence'] = self._check_platform_presence(behance_url, dribbble_url)
|
| 67 |
+
|
| 68 |
+
# Tool proficiency
|
| 69 |
+
tools_text = evidence_data.get('design_tools_text', '')
|
| 70 |
+
features['tool_proficiency'] = self._assess_tool_proficiency(tools_text)
|
| 71 |
+
|
| 72 |
+
# Project diversity and description
|
| 73 |
+
project_desc = evidence_data.get('project_description', '')
|
| 74 |
+
features['project_diversity'] = self._assess_project_diversity(project_desc)
|
| 75 |
+
features['description_depth'] = self._assess_description_depth(project_desc)
|
| 76 |
+
|
| 77 |
+
# Calculate weighted score
|
| 78 |
+
score = sum(features[k] * self.feature_weights[k] for k in features.keys())
|
| 79 |
+
|
| 80 |
+
# Calculate confidence
|
| 81 |
+
confidence = self.calculate_confidence(evidence_data)
|
| 82 |
+
|
| 83 |
+
processing_time = (time.time() - start_time) * 1000
|
| 84 |
+
|
| 85 |
+
return DomainScore(
|
| 86 |
+
domain_type='creative',
|
| 87 |
+
score=min(score, 1.0),
|
| 88 |
+
confidence=confidence,
|
| 89 |
+
raw_features=features,
|
| 90 |
+
processing_time_ms=processing_time
|
| 91 |
+
)
|
| 92 |
+
|
| 93 |
+
def _analyze_portfolio_quality(self, portfolio_url: str) -> float:
|
| 94 |
+
"""
|
| 95 |
+
Analyze portfolio website quality
|
| 96 |
+
Returns: 0-1 score based on accessibility and professionalism
|
| 97 |
+
"""
|
| 98 |
+
try:
|
| 99 |
+
if not portfolio_url.startswith(('http://', 'https://')):
|
| 100 |
+
portfolio_url = 'https://' + portfolio_url
|
| 101 |
+
|
| 102 |
+
response = requests.head(portfolio_url, timeout=5, allow_redirects=True)
|
| 103 |
+
|
| 104 |
+
if response.status_code == 200:
|
| 105 |
+
score = 0.6 # Base score for accessible portfolio
|
| 106 |
+
|
| 107 |
+
# Bonus for professional platforms
|
| 108 |
+
if any(platform in portfolio_url for platform in self.portfolio_platforms):
|
| 109 |
+
score += 0.2
|
| 110 |
+
|
| 111 |
+
# Bonus for custom domain
|
| 112 |
+
if not any(free in portfolio_url for free in ['github.io', 'wixsite', 'wordpress.com']):
|
| 113 |
+
score += 0.2
|
| 114 |
+
|
| 115 |
+
logger.info(f"Portfolio quality: {score:.2f}")
|
| 116 |
+
return min(score, 1.0)
|
| 117 |
+
else:
|
| 118 |
+
return 0.2
|
| 119 |
+
|
| 120 |
+
except Exception as e:
|
| 121 |
+
logger.error(f"Error analyzing portfolio: {e}")
|
| 122 |
+
return 0.2
|
| 123 |
+
|
| 124 |
+
def _check_platform_presence(self, behance_url: str, dribbble_url: str) -> float:
|
| 125 |
+
"""
|
| 126 |
+
Check presence on design platforms
|
| 127 |
+
Returns: 0-1 score based on platform profiles
|
| 128 |
+
"""
|
| 129 |
+
score = 0.0
|
| 130 |
+
|
| 131 |
+
# Behance presence
|
| 132 |
+
if behance_url and 'behance.net' in behance_url:
|
| 133 |
+
try:
|
| 134 |
+
response = requests.head(behance_url, timeout=5, allow_redirects=True)
|
| 135 |
+
if response.status_code == 200:
|
| 136 |
+
score += 0.5
|
| 137 |
+
except:
|
| 138 |
+
score += 0.2 # Partial credit for providing URL
|
| 139 |
+
|
| 140 |
+
# Dribbble presence
|
| 141 |
+
if dribbble_url and 'dribbble.com' in dribbble_url:
|
| 142 |
+
try:
|
| 143 |
+
response = requests.head(dribbble_url, timeout=5, allow_redirects=True)
|
| 144 |
+
if response.status_code == 200:
|
| 145 |
+
score += 0.5
|
| 146 |
+
except:
|
| 147 |
+
score += 0.2
|
| 148 |
+
|
| 149 |
+
logger.info(f"Platform presence: {score:.2f}")
|
| 150 |
+
return min(score, 1.0)
|
| 151 |
+
|
| 152 |
+
def _assess_tool_proficiency(self, tools_text: str) -> float:
|
| 153 |
+
"""
|
| 154 |
+
Assess design tool proficiency
|
| 155 |
+
Returns: 0-1 score based on tool mentions
|
| 156 |
+
"""
|
| 157 |
+
if not tools_text:
|
| 158 |
+
return 0.0
|
| 159 |
+
|
| 160 |
+
text_lower = tools_text.lower()
|
| 161 |
+
|
| 162 |
+
# Count tool mentions
|
| 163 |
+
tool_count = sum(1 for tool in self.design_tools if tool in text_lower)
|
| 164 |
+
|
| 165 |
+
# Score based on tool diversity
|
| 166 |
+
score = min(tool_count / 5, 1.0) # 5+ tools = max
|
| 167 |
+
|
| 168 |
+
# Bonus for professional tools (Adobe, Figma)
|
| 169 |
+
pro_tools = ['figma', 'adobe', 'sketch']
|
| 170 |
+
if any(tool in text_lower for tool in pro_tools):
|
| 171 |
+
score = min(score + 0.2, 1.0)
|
| 172 |
+
|
| 173 |
+
logger.info(f"Tool proficiency: {score:.2f} ({tool_count} tools)")
|
| 174 |
+
return score
|
| 175 |
+
|
| 176 |
+
def _assess_project_diversity(self, project_desc: str) -> float:
|
| 177 |
+
"""
|
| 178 |
+
Assess project type diversity
|
| 179 |
+
Returns: 0-1 score based on project variety
|
| 180 |
+
"""
|
| 181 |
+
if not project_desc:
|
| 182 |
+
return 0.0
|
| 183 |
+
|
| 184 |
+
text_lower = project_desc.lower()
|
| 185 |
+
|
| 186 |
+
# Project type categories
|
| 187 |
+
project_types = [
|
| 188 |
+
'ui design', 'ux design', 'branding', 'logo', 'illustration',
|
| 189 |
+
'animation', '3d', 'web design', 'mobile app', 'poster',
|
| 190 |
+
'packaging', 'typography', 'infographic', 'video editing'
|
| 191 |
+
]
|
| 192 |
+
|
| 193 |
+
type_count = sum(1 for ptype in project_types if ptype in text_lower)
|
| 194 |
+
score = min(type_count / 6, 1.0) # 6+ types = max
|
| 195 |
+
|
| 196 |
+
logger.info(f"Project diversity: {score:.2f} ({type_count} types)")
|
| 197 |
+
return score
|
| 198 |
+
|
| 199 |
+
def _assess_description_depth(self, project_desc: str) -> float:
|
| 200 |
+
"""
|
| 201 |
+
Assess depth of project descriptions
|
| 202 |
+
Returns: 0-1 score based on detail level
|
| 203 |
+
"""
|
| 204 |
+
if not project_desc or len(project_desc) < 50:
|
| 205 |
+
return 0.0
|
| 206 |
+
|
| 207 |
+
score = min(len(project_desc) / 1000, 1.0) # 1000+ chars = max
|
| 208 |
+
|
| 209 |
+
logger.info(f"Description depth: {score:.2f}")
|
| 210 |
+
return score
|
services/domain_plugins/plugin_factory.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Plugin Factory for Domain-Specific Scoring"""
|
| 2 |
+
import logging
|
| 3 |
+
from typing import Dict, Optional, List
|
| 4 |
+
from .base_plugin import BaseDomainPlugin
|
| 5 |
+
|
| 6 |
+
logger = logging.getLogger(__name__)
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class DomainPluginFactory:
|
| 10 |
+
"""Factory for registering and instantiating domain plugins"""
|
| 11 |
+
|
| 12 |
+
_plugins: Dict[str, type] = {}
|
| 13 |
+
_instances: Dict[str, BaseDomainPlugin] = {} # Singleton instances
|
| 14 |
+
|
| 15 |
+
@classmethod
|
| 16 |
+
def register(cls, domain_type: str, plugin_class: type):
|
| 17 |
+
"""Register a plugin class"""
|
| 18 |
+
if not issubclass(plugin_class, BaseDomainPlugin):
|
| 19 |
+
raise TypeError(f"{plugin_class} must inherit from BaseDomainPlugin")
|
| 20 |
+
|
| 21 |
+
cls._plugins[domain_type] = plugin_class
|
| 22 |
+
logger.info(f"Registered domain plugin: {domain_type}")
|
| 23 |
+
|
| 24 |
+
@classmethod
|
| 25 |
+
def get_plugin(cls, domain_type: str) -> Optional[BaseDomainPlugin]:
|
| 26 |
+
"""Get plugin instance (singleton pattern)"""
|
| 27 |
+
if domain_type not in cls._plugins:
|
| 28 |
+
logger.warning(f"Plugin not found: {domain_type}")
|
| 29 |
+
return None
|
| 30 |
+
|
| 31 |
+
# Return cached instance or create new one
|
| 32 |
+
if domain_type not in cls._instances:
|
| 33 |
+
cls._instances[domain_type] = cls._plugins[domain_type]()
|
| 34 |
+
|
| 35 |
+
return cls._instances[domain_type]
|
| 36 |
+
|
| 37 |
+
@classmethod
|
| 38 |
+
def list_available_domains(cls) -> List[str]:
|
| 39 |
+
"""List all registered domain types"""
|
| 40 |
+
return list(cls._plugins.keys())
|
| 41 |
+
|
| 42 |
+
@classmethod
|
| 43 |
+
def is_domain_available(cls, domain_type: str) -> bool:
|
| 44 |
+
"""Check if domain plugin is registered"""
|
| 45 |
+
return domain_type in cls._plugins
|
| 46 |
+
|
| 47 |
+
@classmethod
|
| 48 |
+
def get_domain_info(cls, domain_type: str) -> Optional[Dict]:
|
| 49 |
+
"""Get domain plugin information"""
|
| 50 |
+
plugin = cls.get_plugin(domain_type)
|
| 51 |
+
if not plugin:
|
| 52 |
+
return None
|
| 53 |
+
|
| 54 |
+
return {
|
| 55 |
+
'domain_type': plugin.domain_type,
|
| 56 |
+
'required_fields': plugin.get_required_fields(),
|
| 57 |
+
'optional_fields': plugin.get_optional_fields(),
|
| 58 |
+
'feature_weights': plugin.feature_weights
|
| 59 |
+
}
|
| 60 |
+
|
| 61 |
+
@classmethod
|
| 62 |
+
def clear_cache(cls):
|
| 63 |
+
"""Clear singleton instances (useful for testing)"""
|
| 64 |
+
cls._instances.clear()
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
# Auto-registration helper decorator
|
| 68 |
+
def register_plugin(domain_type: str):
|
| 69 |
+
"""Decorator to auto-register plugins"""
|
| 70 |
+
def decorator(plugin_class):
|
| 71 |
+
DomainPluginFactory.register(domain_type, plugin_class)
|
| 72 |
+
return plugin_class
|
| 73 |
+
return decorator
|
services/domain_plugins/research_plugin.py
ADDED
|
@@ -0,0 +1,228 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Research/Science Domain Plugin
|
| 2 |
+
|
| 3 |
+
Scores research competency based on:
|
| 4 |
+
- Publication record (papers, citations)
|
| 5 |
+
- Lab experience and duration
|
| 6 |
+
- Research project depth
|
| 7 |
+
- Thesis/dissertation summaries
|
| 8 |
+
"""
|
| 9 |
+
import re
|
| 10 |
+
import time
|
| 11 |
+
import logging
|
| 12 |
+
from typing import Dict, List
|
| 13 |
+
from .base_plugin import BaseDomainPlugin, DomainScore
|
| 14 |
+
from .plugin_factory import register_plugin
|
| 15 |
+
|
| 16 |
+
logger = logging.getLogger(__name__)
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
@register_plugin('research')
|
| 20 |
+
class ResearchPlugin(BaseDomainPlugin):
|
| 21 |
+
"""Research/Science domain scoring plugin"""
|
| 22 |
+
|
| 23 |
+
def __init__(self):
|
| 24 |
+
super().__init__()
|
| 25 |
+
# Research indicators
|
| 26 |
+
self.publication_venues = [
|
| 27 |
+
'journal', 'conference', 'proceedings', 'ieee', 'acm',
|
| 28 |
+
'springer', 'elsevier', 'nature', 'science', 'arxiv'
|
| 29 |
+
]
|
| 30 |
+
self.research_methods = [
|
| 31 |
+
'experiment', 'methodology', 'hypothesis', 'literature review',
|
| 32 |
+
'data collection', 'statistical analysis', 'simulation', 'survey'
|
| 33 |
+
]
|
| 34 |
+
|
| 35 |
+
def _get_domain_type(self) -> str:
|
| 36 |
+
return 'research'
|
| 37 |
+
|
| 38 |
+
def _get_feature_weights(self) -> Dict[str, float]:
|
| 39 |
+
return {
|
| 40 |
+
'publication_score': 0.35,
|
| 41 |
+
'lab_experience_score': 0.25,
|
| 42 |
+
'research_depth_score': 0.25,
|
| 43 |
+
'thesis_quality_score': 0.15
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
def get_required_fields(self) -> List[str]:
|
| 47 |
+
return ['research_description']
|
| 48 |
+
|
| 49 |
+
def get_optional_fields(self) -> List[str]:
|
| 50 |
+
return ['publications_text', 'lab_experience_text', 'thesis_summary']
|
| 51 |
+
|
| 52 |
+
def score(self, evidence_data: Dict) -> DomainScore:
|
| 53 |
+
"""Calculate research domain score"""
|
| 54 |
+
start_time = time.time()
|
| 55 |
+
features = {}
|
| 56 |
+
|
| 57 |
+
# Publication analysis
|
| 58 |
+
publications = evidence_data.get('publications_text', '')
|
| 59 |
+
features['publication_score'] = self._analyze_publications(publications)
|
| 60 |
+
|
| 61 |
+
# Lab experience
|
| 62 |
+
lab_exp = evidence_data.get('lab_experience_text', '')
|
| 63 |
+
features['lab_experience_score'] = self._analyze_lab_experience(lab_exp)
|
| 64 |
+
|
| 65 |
+
# Research depth from main description
|
| 66 |
+
research_desc = evidence_data.get('research_description', '')
|
| 67 |
+
features['research_depth_score'] = self._analyze_research_depth(research_desc)
|
| 68 |
+
|
| 69 |
+
# Thesis quality
|
| 70 |
+
thesis = evidence_data.get('thesis_summary', '')
|
| 71 |
+
features['thesis_quality_score'] = self._analyze_thesis(thesis)
|
| 72 |
+
|
| 73 |
+
# Calculate weighted score
|
| 74 |
+
score = sum(features[k] * self.feature_weights[k] for k in features.keys())
|
| 75 |
+
|
| 76 |
+
# Calculate confidence
|
| 77 |
+
confidence = self.calculate_confidence(evidence_data)
|
| 78 |
+
|
| 79 |
+
processing_time = (time.time() - start_time) * 1000
|
| 80 |
+
|
| 81 |
+
return DomainScore(
|
| 82 |
+
domain_type='research',
|
| 83 |
+
score=min(score, 1.0),
|
| 84 |
+
confidence=confidence,
|
| 85 |
+
raw_features=features,
|
| 86 |
+
processing_time_ms=processing_time
|
| 87 |
+
)
|
| 88 |
+
|
| 89 |
+
def _analyze_publications(self, publications_text: str) -> float:
|
| 90 |
+
"""
|
| 91 |
+
Analyze publication record
|
| 92 |
+
Returns: 0-1 score based on number and quality of publications
|
| 93 |
+
"""
|
| 94 |
+
if not publications_text or len(publications_text) < 30:
|
| 95 |
+
return 0.0
|
| 96 |
+
|
| 97 |
+
text_lower = publications_text.lower()
|
| 98 |
+
score = 0.0
|
| 99 |
+
|
| 100 |
+
# Count publication mentions (by common patterns)
|
| 101 |
+
# Pattern: "Paper title" or [1] Reference format
|
| 102 |
+
title_patterns = [
|
| 103 |
+
r'"([^"]+)"', # Quoted titles
|
| 104 |
+
r'\[\d+\]', # Numbered references
|
| 105 |
+
r'\d{4}\.\s', # Year format (2023. Title...)
|
| 106 |
+
]
|
| 107 |
+
|
| 108 |
+
pub_count = 0
|
| 109 |
+
for pattern in title_patterns:
|
| 110 |
+
matches = re.findall(pattern, publications_text)
|
| 111 |
+
pub_count = max(pub_count, len(matches))
|
| 112 |
+
|
| 113 |
+
# Score based on publication count
|
| 114 |
+
count_score = min(pub_count / 5, 0.6) # 5+ pubs = 0.6
|
| 115 |
+
score += count_score
|
| 116 |
+
|
| 117 |
+
# Venue quality bonus
|
| 118 |
+
venue_count = sum(1 for venue in self.publication_venues if venue in text_lower)
|
| 119 |
+
venue_score = min(venue_count / 3, 0.4) # 3+ venues = 0.4
|
| 120 |
+
score += venue_score
|
| 121 |
+
|
| 122 |
+
logger.info(f"Publication score: {score:.2f} ({pub_count} pubs, {venue_count} venues)")
|
| 123 |
+
return min(score, 1.0)
|
| 124 |
+
|
| 125 |
+
def _analyze_lab_experience(self, lab_text: str) -> float:
|
| 126 |
+
"""
|
| 127 |
+
Analyze laboratory experience
|
| 128 |
+
Returns: 0-1 score based on duration and depth
|
| 129 |
+
"""
|
| 130 |
+
if not lab_text or len(lab_text) < 30:
|
| 131 |
+
return 0.0
|
| 132 |
+
|
| 133 |
+
text_lower = lab_text.lower()
|
| 134 |
+
score = 0.0
|
| 135 |
+
|
| 136 |
+
# Extract duration (months/years)
|
| 137 |
+
duration_patterns = [
|
| 138 |
+
(r'(\d+)\s*years?', 12), # Convert years to months
|
| 139 |
+
(r'(\d+)\s*months?', 1),
|
| 140 |
+
]
|
| 141 |
+
|
| 142 |
+
max_duration = 0
|
| 143 |
+
for pattern, multiplier in duration_patterns:
|
| 144 |
+
matches = re.findall(pattern, text_lower)
|
| 145 |
+
if matches:
|
| 146 |
+
duration = max([int(m) * multiplier for m in matches])
|
| 147 |
+
max_duration = max(max_duration, duration)
|
| 148 |
+
|
| 149 |
+
# Duration score (12 months = max)
|
| 150 |
+
duration_score = min(max_duration / 12, 0.5)
|
| 151 |
+
score += duration_score
|
| 152 |
+
|
| 153 |
+
# Lab quality indicators
|
| 154 |
+
quality_keywords = ['research lab', 'professor', 'phd', 'equipment', 'experiment', 'protocol']
|
| 155 |
+
quality_count = sum(1 for kw in quality_keywords if kw in text_lower)
|
| 156 |
+
quality_score = min(quality_count / 4, 0.5)
|
| 157 |
+
score += quality_score
|
| 158 |
+
|
| 159 |
+
logger.info(f"Lab experience: {score:.2f} ({max_duration} months)")
|
| 160 |
+
return min(score, 1.0)
|
| 161 |
+
|
| 162 |
+
def _analyze_research_depth(self, research_desc: str) -> float:
|
| 163 |
+
"""
|
| 164 |
+
Analyze research methodology depth
|
| 165 |
+
Returns: 0-1 score based on methodology sophistication
|
| 166 |
+
"""
|
| 167 |
+
if not research_desc or len(research_desc) < 50:
|
| 168 |
+
return 0.0
|
| 169 |
+
|
| 170 |
+
text_lower = research_desc.lower()
|
| 171 |
+
score = 0.0
|
| 172 |
+
|
| 173 |
+
# Research method mentions
|
| 174 |
+
method_count = sum(1 for method in self.research_methods if method in text_lower)
|
| 175 |
+
method_score = min(method_count / 4, 0.5)
|
| 176 |
+
score += method_score
|
| 177 |
+
|
| 178 |
+
# Technical depth indicators
|
| 179 |
+
technical_terms = [
|
| 180 |
+
'algorithm', 'model', 'framework', 'dataset', 'validation',
|
| 181 |
+
'baseline', 'benchmark', 'evaluation', 'metrics', 'results'
|
| 182 |
+
]
|
| 183 |
+
tech_count = sum(1 for term in technical_terms if term in text_lower)
|
| 184 |
+
tech_score = min(tech_count / 5, 0.3)
|
| 185 |
+
score += tech_score
|
| 186 |
+
|
| 187 |
+
# Length as depth proxy
|
| 188 |
+
length_score = min(len(research_desc) / 1000, 0.2)
|
| 189 |
+
score += length_score
|
| 190 |
+
|
| 191 |
+
logger.info(f"Research depth: {score:.2f}")
|
| 192 |
+
return min(score, 1.0)
|
| 193 |
+
|
| 194 |
+
def _analyze_thesis(self, thesis_text: str) -> float:
|
| 195 |
+
"""
|
| 196 |
+
Analyze thesis/dissertation quality
|
| 197 |
+
Returns: 0-1 score based on structure and depth
|
| 198 |
+
"""
|
| 199 |
+
if not thesis_text or len(thesis_text) < 100:
|
| 200 |
+
return 0.0
|
| 201 |
+
|
| 202 |
+
text_lower = thesis_text.lower()
|
| 203 |
+
score = 0.0
|
| 204 |
+
|
| 205 |
+
# Thesis structure keywords
|
| 206 |
+
structure_keywords = [
|
| 207 |
+
'abstract', 'introduction', 'methodology', 'results',
|
| 208 |
+
'discussion', 'conclusion', 'references', 'chapter'
|
| 209 |
+
]
|
| 210 |
+
structure_count = sum(1 for kw in structure_keywords if kw in text_lower)
|
| 211 |
+
structure_score = min(structure_count / 5, 0.5)
|
| 212 |
+
score += structure_score
|
| 213 |
+
|
| 214 |
+
# Academic rigor indicators
|
| 215 |
+
rigor_keywords = [
|
| 216 |
+
'research question', 'objective', 'contribution', 'limitation',
|
| 217 |
+
'future work', 'significance', 'novelty', 'finding'
|
| 218 |
+
]
|
| 219 |
+
rigor_count = sum(1 for kw in rigor_keywords if kw in text_lower)
|
| 220 |
+
rigor_score = min(rigor_count / 4, 0.3)
|
| 221 |
+
score += rigor_score
|
| 222 |
+
|
| 223 |
+
# Length bonus
|
| 224 |
+
length_score = min(len(thesis_text) / 2000, 0.2)
|
| 225 |
+
score += length_score
|
| 226 |
+
|
| 227 |
+
logger.info(f"Thesis quality: {score:.2f}")
|
| 228 |
+
return min(score, 1.0)
|
services/domain_plugins/tech_plugin.py
ADDED
|
@@ -0,0 +1,270 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Tech/CS Domain Plugin
|
| 2 |
+
|
| 3 |
+
Scores technical competency based on:
|
| 4 |
+
- GitHub activity (commits, repos, stars, descriptions)
|
| 5 |
+
- LeetCode profile (problems solved, ranking)
|
| 6 |
+
- Portfolio links (project depth analysis)
|
| 7 |
+
"""
|
| 8 |
+
import re
|
| 9 |
+
import time
|
| 10 |
+
import logging
|
| 11 |
+
import requests
|
| 12 |
+
from typing import Dict, List
|
| 13 |
+
from .base_plugin import BaseDomainPlugin, DomainScore
|
| 14 |
+
from .plugin_factory import register_plugin
|
| 15 |
+
|
| 16 |
+
logger = logging.getLogger(__name__)
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
@register_plugin('tech')
|
| 20 |
+
class TechPlugin(BaseDomainPlugin):
|
| 21 |
+
"""Technical domain scoring plugin"""
|
| 22 |
+
|
| 23 |
+
def _get_domain_type(self) -> str:
|
| 24 |
+
return 'tech'
|
| 25 |
+
|
| 26 |
+
def _get_feature_weights(self) -> Dict[str, float]:
|
| 27 |
+
return {
|
| 28 |
+
'github_activity_score': 0.30,
|
| 29 |
+
'github_repo_quality': 0.20,
|
| 30 |
+
'leetcode_score': 0.25,
|
| 31 |
+
'portfolio_depth': 0.15,
|
| 32 |
+
'recent_activity': 0.10
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
def get_required_fields(self) -> List[str]:
|
| 36 |
+
return ['github_url'] # At least GitHub is required
|
| 37 |
+
|
| 38 |
+
def get_optional_fields(self) -> List[str]:
|
| 39 |
+
return ['leetcode_handle', 'portfolio_url', 'linkedin_url']
|
| 40 |
+
|
| 41 |
+
def score(self, evidence_data: Dict) -> DomainScore:
|
| 42 |
+
"""Calculate tech domain score"""
|
| 43 |
+
start_time = time.time()
|
| 44 |
+
features = {}
|
| 45 |
+
|
| 46 |
+
# GitHub analysis
|
| 47 |
+
github_url = evidence_data.get('github_url', '')
|
| 48 |
+
if github_url:
|
| 49 |
+
features['github_activity_score'] = self._analyze_github_activity(github_url)
|
| 50 |
+
features['github_repo_quality'] = self._analyze_repo_quality(github_url)
|
| 51 |
+
features['recent_activity'] = self._check_recent_commits(github_url)
|
| 52 |
+
else:
|
| 53 |
+
features['github_activity_score'] = 0.0
|
| 54 |
+
features['github_repo_quality'] = 0.0
|
| 55 |
+
features['recent_activity'] = 0.0
|
| 56 |
+
|
| 57 |
+
# LeetCode analysis
|
| 58 |
+
leetcode_handle = evidence_data.get('leetcode_handle', '')
|
| 59 |
+
if leetcode_handle:
|
| 60 |
+
features['leetcode_score'] = self._analyze_leetcode(leetcode_handle)
|
| 61 |
+
else:
|
| 62 |
+
features['leetcode_score'] = 0.0
|
| 63 |
+
|
| 64 |
+
# Portfolio analysis
|
| 65 |
+
portfolio_url = evidence_data.get('portfolio_url', '')
|
| 66 |
+
if portfolio_url:
|
| 67 |
+
features['portfolio_depth'] = self._analyze_portfolio(portfolio_url)
|
| 68 |
+
else:
|
| 69 |
+
features['portfolio_depth'] = 0.0
|
| 70 |
+
|
| 71 |
+
# Calculate weighted score
|
| 72 |
+
score = sum(features[k] * self.feature_weights[k] for k in features.keys())
|
| 73 |
+
|
| 74 |
+
# Calculate confidence
|
| 75 |
+
confidence = self.calculate_confidence(evidence_data)
|
| 76 |
+
|
| 77 |
+
processing_time = (time.time() - start_time) * 1000
|
| 78 |
+
|
| 79 |
+
return DomainScore(
|
| 80 |
+
domain_type='tech',
|
| 81 |
+
score=min(score, 1.0),
|
| 82 |
+
confidence=confidence,
|
| 83 |
+
raw_features=features,
|
| 84 |
+
processing_time_ms=processing_time
|
| 85 |
+
)
|
| 86 |
+
|
| 87 |
+
def _analyze_github_activity(self, github_url: str) -> float:
|
| 88 |
+
"""
|
| 89 |
+
Analyze GitHub profile activity
|
| 90 |
+
Returns: 0-1 score based on public repos, commits, contributions
|
| 91 |
+
"""
|
| 92 |
+
try:
|
| 93 |
+
username = self._extract_github_username(github_url)
|
| 94 |
+
if not username:
|
| 95 |
+
return 0.0
|
| 96 |
+
|
| 97 |
+
# GitHub API endpoint
|
| 98 |
+
api_url = f"https://api.github.com/users/{username}"
|
| 99 |
+
headers = {'Accept': 'application/vnd.github.v3+json'}
|
| 100 |
+
|
| 101 |
+
response = requests.get(api_url, headers=headers, timeout=5)
|
| 102 |
+
|
| 103 |
+
if response.status_code != 200:
|
| 104 |
+
logger.warning(f"GitHub API error for {username}: {response.status_code}")
|
| 105 |
+
return 0.3 # Fallback score if API fails
|
| 106 |
+
|
| 107 |
+
data = response.json()
|
| 108 |
+
|
| 109 |
+
# Extract metrics
|
| 110 |
+
public_repos = data.get('public_repos', 0)
|
| 111 |
+
followers = data.get('followers', 0)
|
| 112 |
+
following = data.get('following', 0)
|
| 113 |
+
|
| 114 |
+
# Simple scoring heuristic
|
| 115 |
+
repo_score = min(public_repos / 20, 1.0) * 0.5 # 20+ repos = max
|
| 116 |
+
follower_score = min(followers / 50, 1.0) * 0.3 # 50+ followers = max
|
| 117 |
+
engagement_score = min((followers + following) / 100, 1.0) * 0.2
|
| 118 |
+
|
| 119 |
+
total_score = repo_score + follower_score + engagement_score
|
| 120 |
+
|
| 121 |
+
logger.info(f"GitHub activity for {username}: {total_score:.2f}")
|
| 122 |
+
return total_score
|
| 123 |
+
|
| 124 |
+
except Exception as e:
|
| 125 |
+
logger.error(f"Error analyzing GitHub activity: {e}")
|
| 126 |
+
return 0.3 # Fallback score
|
| 127 |
+
|
| 128 |
+
def _analyze_repo_quality(self, github_url: str) -> float:
|
| 129 |
+
"""
|
| 130 |
+
Analyze quality of top repositories
|
| 131 |
+
Returns: 0-1 score based on stars, forks, descriptions
|
| 132 |
+
"""
|
| 133 |
+
try:
|
| 134 |
+
username = self._extract_github_username(github_url)
|
| 135 |
+
if not username:
|
| 136 |
+
return 0.0
|
| 137 |
+
|
| 138 |
+
# Get repos sorted by stars
|
| 139 |
+
api_url = f"https://api.github.com/users/{username}/repos?sort=stars&per_page=10"
|
| 140 |
+
headers = {'Accept': 'application/vnd.github.v3+json'}
|
| 141 |
+
|
| 142 |
+
response = requests.get(api_url, headers=headers, timeout=5)
|
| 143 |
+
|
| 144 |
+
if response.status_code != 200:
|
| 145 |
+
return 0.3
|
| 146 |
+
|
| 147 |
+
repos = response.json()
|
| 148 |
+
|
| 149 |
+
if not repos:
|
| 150 |
+
return 0.0
|
| 151 |
+
|
| 152 |
+
# Analyze top repos
|
| 153 |
+
total_stars = sum(r.get('stargazers_count', 0) for r in repos[:5])
|
| 154 |
+
total_forks = sum(r.get('forks_count', 0) for r in repos[:5])
|
| 155 |
+
has_descriptions = sum(1 for r in repos[:5] if r.get('description'))
|
| 156 |
+
has_readmes = sum(1 for r in repos[:5] if r.get('has_wiki') or r.get('has_pages'))
|
| 157 |
+
|
| 158 |
+
star_score = min(total_stars / 50, 1.0) * 0.4 # 50+ stars = max
|
| 159 |
+
fork_score = min(total_forks / 20, 1.0) * 0.2 # 20+ forks = max
|
| 160 |
+
desc_score = (has_descriptions / 5) * 0.2
|
| 161 |
+
readme_score = (has_readmes / 5) * 0.2
|
| 162 |
+
|
| 163 |
+
total_score = star_score + fork_score + desc_score + readme_score
|
| 164 |
+
|
| 165 |
+
logger.info(f"GitHub repo quality for {username}: {total_score:.2f}")
|
| 166 |
+
return total_score
|
| 167 |
+
|
| 168 |
+
except Exception as e:
|
| 169 |
+
logger.error(f"Error analyzing repo quality: {e}")
|
| 170 |
+
return 0.3
|
| 171 |
+
|
| 172 |
+
def _check_recent_commits(self, github_url: str) -> float:
|
| 173 |
+
"""
|
| 174 |
+
Check for recent activity (commits in last 90 days)
|
| 175 |
+
Returns: 0-1 score based on recency
|
| 176 |
+
"""
|
| 177 |
+
try:
|
| 178 |
+
username = self._extract_github_username(github_url)
|
| 179 |
+
if not username:
|
| 180 |
+
return 0.0
|
| 181 |
+
|
| 182 |
+
# Get recent events
|
| 183 |
+
api_url = f"https://api.github.com/users/{username}/events/public?per_page=30"
|
| 184 |
+
headers = {'Accept': 'application/vnd.github.v3+json'}
|
| 185 |
+
|
| 186 |
+
response = requests.get(api_url, headers=headers, timeout=5)
|
| 187 |
+
|
| 188 |
+
if response.status_code != 200:
|
| 189 |
+
return 0.5 # Neutral fallback
|
| 190 |
+
|
| 191 |
+
events = response.json()
|
| 192 |
+
|
| 193 |
+
# Count push events (commits) in last 90 days
|
| 194 |
+
from datetime import datetime, timedelta
|
| 195 |
+
ninety_days_ago = datetime.now() - timedelta(days=90)
|
| 196 |
+
|
| 197 |
+
recent_commits = 0
|
| 198 |
+
for event in events:
|
| 199 |
+
if event.get('type') == 'PushEvent':
|
| 200 |
+
created_at = datetime.strptime(event['created_at'], '%Y-%m-%dT%H:%M:%SZ')
|
| 201 |
+
if created_at > ninety_days_ago:
|
| 202 |
+
recent_commits += 1
|
| 203 |
+
|
| 204 |
+
# Score based on commit frequency
|
| 205 |
+
score = min(recent_commits / 20, 1.0) # 20+ commits in 90 days = max
|
| 206 |
+
|
| 207 |
+
logger.info(f"Recent activity for {username}: {score:.2f} ({recent_commits} commits)")
|
| 208 |
+
return score
|
| 209 |
+
|
| 210 |
+
except Exception as e:
|
| 211 |
+
logger.error(f"Error checking recent activity: {e}")
|
| 212 |
+
return 0.5
|
| 213 |
+
|
| 214 |
+
def _analyze_leetcode(self, leetcode_handle: str) -> float:
|
| 215 |
+
"""
|
| 216 |
+
Analyze LeetCode profile
|
| 217 |
+
Returns: 0-1 score based on problems solved and ranking
|
| 218 |
+
|
| 219 |
+
Note: LeetCode doesn't have an official public API, so this uses heuristic fallback
|
| 220 |
+
In production, consider using unofficial APIs or web scraping with proper rate limiting
|
| 221 |
+
"""
|
| 222 |
+
try:
|
| 223 |
+
# Placeholder for LeetCode analysis
|
| 224 |
+
# In real implementation, would scrape profile or use unofficial API
|
| 225 |
+
|
| 226 |
+
# For now, return medium score if handle is provided
|
| 227 |
+
logger.info(f"LeetCode handle provided: {leetcode_handle}")
|
| 228 |
+
return 0.5 # Neutral score when handle exists
|
| 229 |
+
|
| 230 |
+
except Exception as e:
|
| 231 |
+
logger.error(f"Error analyzing LeetCode: {e}")
|
| 232 |
+
return 0.0
|
| 233 |
+
|
| 234 |
+
def _analyze_portfolio(self, portfolio_url: str) -> float:
|
| 235 |
+
"""
|
| 236 |
+
Analyze portfolio website
|
| 237 |
+
Returns: 0-1 score based on presence and basic checks
|
| 238 |
+
"""
|
| 239 |
+
try:
|
| 240 |
+
# Basic URL validation
|
| 241 |
+
if not portfolio_url.startswith(('http://', 'https://')):
|
| 242 |
+
portfolio_url = 'https://' + portfolio_url
|
| 243 |
+
|
| 244 |
+
# Check if URL is accessible
|
| 245 |
+
response = requests.head(portfolio_url, timeout=5, allow_redirects=True)
|
| 246 |
+
|
| 247 |
+
if response.status_code == 200:
|
| 248 |
+
# Portfolio exists and is accessible
|
| 249 |
+
score = 0.7
|
| 250 |
+
|
| 251 |
+
# Bonus for custom domain (not github.io, netlify.app, etc.)
|
| 252 |
+
if not any(host in portfolio_url for host in ['github.io', 'netlify.app', 'vercel.app', 'repl.it']):
|
| 253 |
+
score += 0.3
|
| 254 |
+
|
| 255 |
+
logger.info(f"Portfolio accessible: {portfolio_url} (score: {score})")
|
| 256 |
+
return min(score, 1.0)
|
| 257 |
+
else:
|
| 258 |
+
logger.warning(f"Portfolio not accessible: {portfolio_url}")
|
| 259 |
+
return 0.2 # Some credit for providing URL
|
| 260 |
+
|
| 261 |
+
except Exception as e:
|
| 262 |
+
logger.error(f"Error analyzing portfolio: {e}")
|
| 263 |
+
return 0.2
|
| 264 |
+
|
| 265 |
+
def _extract_github_username(self, github_url: str) -> str:
|
| 266 |
+
"""Extract username from GitHub URL"""
|
| 267 |
+
# Handle formats: https://github.com/username or github.com/username
|
| 268 |
+
pattern = r'github\.com/([a-zA-Z0-9_-]+)'
|
| 269 |
+
match = re.search(pattern, github_url)
|
| 270 |
+
return match.group(1) if match else ''
|
services/fidelity_transformer.py
ADDED
|
@@ -0,0 +1,481 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Fidelity Criteria Transformer - Enhanced aspect extraction with semantic validation
|
| 3 |
+
"""
|
| 4 |
+
import os
|
| 5 |
+
import logging
|
| 6 |
+
import numpy as np
|
| 7 |
+
from typing import Dict, List, Tuple, Optional, Any
|
| 8 |
+
from dataclasses import dataclass
|
| 9 |
+
|
| 10 |
+
logger = logging.getLogger(__name__)
|
| 11 |
+
|
| 12 |
+
# Try importing sentence transformers
|
| 13 |
+
try:
|
| 14 |
+
from sentence_transformers import SentenceTransformer
|
| 15 |
+
HAS_SBERT = True
|
| 16 |
+
except ImportError:
|
| 17 |
+
HAS_SBERT = False
|
| 18 |
+
logger.warning("sentence-transformers not installed, using fallback")
|
| 19 |
+
|
| 20 |
+
# Try importing domain knowledge base
|
| 21 |
+
try:
|
| 22 |
+
from .domain_knowledge_base import get_domain_knowledge_base, DomainKnowledgeBase
|
| 23 |
+
HAS_DKB = True
|
| 24 |
+
except ImportError:
|
| 25 |
+
HAS_DKB = False
|
| 26 |
+
logger.warning("DomainKnowledgeBase not available")
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
@dataclass
|
| 30 |
+
class FidelityScore:
|
| 31 |
+
"""Fidelity assessment result"""
|
| 32 |
+
score: float # 0-1 overall fidelity
|
| 33 |
+
coherence: float # Semantic coherence
|
| 34 |
+
coverage: float # Aspect coverage
|
| 35 |
+
depth: float # Content depth
|
| 36 |
+
issues: List[str] # List of detected issues
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
@dataclass
|
| 40 |
+
class AspectExtractionResult:
|
| 41 |
+
"""Result of aspect extraction for a text"""
|
| 42 |
+
aspects: Dict[str, float] # aspect -> score
|
| 43 |
+
chunk_evidence: Dict[str, List[str]] # aspect -> supporting chunks
|
| 44 |
+
fidelity: FidelityScore
|
| 45 |
+
detected_domain: str
|
| 46 |
+
domain_confidence: float
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
class FidelityScorer:
|
| 50 |
+
"""
|
| 51 |
+
Semantic fidelity scoring for text responses
|
| 52 |
+
Validates response quality against expected patterns
|
| 53 |
+
"""
|
| 54 |
+
|
| 55 |
+
# Generic/copy-paste patterns to detect
|
| 56 |
+
GENERIC_PATTERNS = [
|
| 57 |
+
"i am a hard worker",
|
| 58 |
+
"i have good communication skills",
|
| 59 |
+
"i am a team player",
|
| 60 |
+
"i want to learn and grow",
|
| 61 |
+
"i am passionate about",
|
| 62 |
+
"looking for opportunities",
|
| 63 |
+
"seeking challenging role"
|
| 64 |
+
]
|
| 65 |
+
|
| 66 |
+
# Minimum thresholds
|
| 67 |
+
MIN_WORD_COUNT = 30
|
| 68 |
+
IDEAL_WORD_COUNT = 150
|
| 69 |
+
MAX_WORD_COUNT = 500
|
| 70 |
+
|
| 71 |
+
def __init__(self):
|
| 72 |
+
self.generic_patterns = [p.lower() for p in self.GENERIC_PATTERNS]
|
| 73 |
+
|
| 74 |
+
def score(self, text: str, aspect_scores: Dict[str, float],
|
| 75 |
+
expected_aspects: List[str] = None) -> FidelityScore:
|
| 76 |
+
"""
|
| 77 |
+
Compute fidelity score for a text response
|
| 78 |
+
|
| 79 |
+
Args:
|
| 80 |
+
text: The text to evaluate
|
| 81 |
+
aspect_scores: Scores from aspect extraction
|
| 82 |
+
expected_aspects: Aspects expected in this response
|
| 83 |
+
|
| 84 |
+
Returns:
|
| 85 |
+
FidelityScore with detailed breakdown
|
| 86 |
+
"""
|
| 87 |
+
issues = []
|
| 88 |
+
|
| 89 |
+
if not text or len(text.strip()) < 10:
|
| 90 |
+
return FidelityScore(
|
| 91 |
+
score=0.0, coherence=0.0, coverage=0.0, depth=0.0,
|
| 92 |
+
issues=["Response is too short or empty"]
|
| 93 |
+
)
|
| 94 |
+
|
| 95 |
+
text_lower = text.lower()
|
| 96 |
+
word_count = len(text.split())
|
| 97 |
+
|
| 98 |
+
# 1. Content Depth Score
|
| 99 |
+
if word_count < self.MIN_WORD_COUNT:
|
| 100 |
+
depth = 0.2
|
| 101 |
+
issues.append(f"Response too short ({word_count} words, minimum {self.MIN_WORD_COUNT})")
|
| 102 |
+
elif word_count < self.IDEAL_WORD_COUNT:
|
| 103 |
+
depth = 0.5 + 0.3 * (word_count - self.MIN_WORD_COUNT) / (self.IDEAL_WORD_COUNT - self.MIN_WORD_COUNT)
|
| 104 |
+
elif word_count <= self.MAX_WORD_COUNT:
|
| 105 |
+
depth = 1.0
|
| 106 |
+
else:
|
| 107 |
+
depth = 0.9 # Slightly penalize overly long responses
|
| 108 |
+
issues.append("Response is longer than recommended")
|
| 109 |
+
|
| 110 |
+
# 2. Generic Pattern Detection
|
| 111 |
+
generic_count = sum(1 for p in self.generic_patterns if p in text_lower)
|
| 112 |
+
coherence_penalty = min(generic_count * 0.1, 0.4)
|
| 113 |
+
|
| 114 |
+
if generic_count > 2:
|
| 115 |
+
issues.append(f"Contains {generic_count} generic phrases")
|
| 116 |
+
|
| 117 |
+
# 3. Coherence Score (based on sentence structure and vocabulary)
|
| 118 |
+
sentences = [s.strip() for s in text.split('.') if len(s.strip()) > 10]
|
| 119 |
+
|
| 120 |
+
if len(sentences) < 2:
|
| 121 |
+
coherence = 0.4
|
| 122 |
+
issues.append("Response lacks proper sentence structure")
|
| 123 |
+
else:
|
| 124 |
+
# Check vocabulary diversity
|
| 125 |
+
words = text_lower.split()
|
| 126 |
+
unique_ratio = len(set(words)) / max(len(words), 1)
|
| 127 |
+
|
| 128 |
+
# Base coherence
|
| 129 |
+
coherence = 0.6 + 0.2 * unique_ratio
|
| 130 |
+
|
| 131 |
+
# Apply generic penalty
|
| 132 |
+
coherence = max(0.2, coherence - coherence_penalty)
|
| 133 |
+
|
| 134 |
+
# 4. Aspect Coverage Score
|
| 135 |
+
if expected_aspects and aspect_scores:
|
| 136 |
+
covered = sum(1 for a in expected_aspects
|
| 137 |
+
if aspect_scores.get(a, 0) > 0.4)
|
| 138 |
+
coverage = covered / len(expected_aspects)
|
| 139 |
+
|
| 140 |
+
if coverage < 0.5:
|
| 141 |
+
issues.append(f"Only {covered}/{len(expected_aspects)} expected aspects covered")
|
| 142 |
+
else:
|
| 143 |
+
# Use variance of aspect scores as proxy
|
| 144 |
+
if aspect_scores:
|
| 145 |
+
scores = list(aspect_scores.values())
|
| 146 |
+
max_score = max(scores)
|
| 147 |
+
coverage = min(max_score + 0.2, 1.0) # Having any strong aspect is good
|
| 148 |
+
else:
|
| 149 |
+
coverage = 0.3
|
| 150 |
+
|
| 151 |
+
# 5. Overall Fidelity Score
|
| 152 |
+
fidelity_score = (
|
| 153 |
+
0.30 * depth +
|
| 154 |
+
0.35 * coherence +
|
| 155 |
+
0.35 * coverage
|
| 156 |
+
)
|
| 157 |
+
|
| 158 |
+
return FidelityScore(
|
| 159 |
+
score=round(fidelity_score, 3),
|
| 160 |
+
coherence=round(coherence, 3),
|
| 161 |
+
coverage=round(coverage, 3),
|
| 162 |
+
depth=round(depth, 3),
|
| 163 |
+
issues=issues
|
| 164 |
+
)
|
| 165 |
+
|
| 166 |
+
|
| 167 |
+
class FidelityCriteriaTransformer:
|
| 168 |
+
"""
|
| 169 |
+
Enhanced aspect extraction with domain-aware prototypes and fidelity validation
|
| 170 |
+
|
| 171 |
+
Key improvements over TextModuleV2:
|
| 172 |
+
1. Domain-specific aspect prototypes from DomainKnowledgeBase
|
| 173 |
+
2. Fidelity scoring for response quality validation
|
| 174 |
+
3. Unified extraction interface with rich output
|
| 175 |
+
"""
|
| 176 |
+
|
| 177 |
+
def __init__(self,
|
| 178 |
+
model_name: str = None,
|
| 179 |
+
domains_dir: str = None,
|
| 180 |
+
use_gpu: bool = False):
|
| 181 |
+
"""
|
| 182 |
+
Initialize FCT
|
| 183 |
+
|
| 184 |
+
Args:
|
| 185 |
+
model_name: Sentence transformer model (default: all-mpnet-base-v2)
|
| 186 |
+
domains_dir: Path to domain config directory
|
| 187 |
+
use_gpu: Whether to use GPU for encoding
|
| 188 |
+
"""
|
| 189 |
+
self.model_name = model_name or os.getenv('FCT_MODEL_NAME', 'all-mpnet-base-v2')
|
| 190 |
+
self.device = 'cuda' if use_gpu else 'cpu'
|
| 191 |
+
|
| 192 |
+
# Initialize encoder
|
| 193 |
+
if HAS_SBERT:
|
| 194 |
+
logger.info(f"Loading FCT model: {self.model_name}")
|
| 195 |
+
self.encoder = SentenceTransformer(self.model_name, device=self.device)
|
| 196 |
+
else:
|
| 197 |
+
self.encoder = None
|
| 198 |
+
logger.warning("Running in fallback mode without sentence transformers")
|
| 199 |
+
|
| 200 |
+
# Initialize domain knowledge base
|
| 201 |
+
if HAS_DKB:
|
| 202 |
+
self.dkb = get_domain_knowledge_base(domains_dir)
|
| 203 |
+
else:
|
| 204 |
+
self.dkb = None
|
| 205 |
+
|
| 206 |
+
# Initialize fidelity scorer
|
| 207 |
+
self.fidelity_scorer = FidelityScorer()
|
| 208 |
+
|
| 209 |
+
# Cache for centroids (domain -> aspect -> centroid)
|
| 210 |
+
self._centroid_cache: Dict[str, Dict[str, np.ndarray]] = {}
|
| 211 |
+
|
| 212 |
+
# Default aspects (fallback when no domain detected)
|
| 213 |
+
self.default_aspects = [
|
| 214 |
+
'technical_skills', 'problem_solving', 'leadership',
|
| 215 |
+
'communication', 'teamwork', 'initiative', 'learning_agility'
|
| 216 |
+
]
|
| 217 |
+
|
| 218 |
+
logger.info("FidelityCriteriaTransformer initialized")
|
| 219 |
+
|
| 220 |
+
def _get_centroids(self, domain_id: str,
|
| 221 |
+
aspect_seeds: Dict[str, List[str]]) -> Dict[str, np.ndarray]:
|
| 222 |
+
"""Get or compute centroids for aspects"""
|
| 223 |
+
cache_key = domain_id
|
| 224 |
+
|
| 225 |
+
if cache_key in self._centroid_cache:
|
| 226 |
+
return self._centroid_cache[cache_key]
|
| 227 |
+
|
| 228 |
+
if not self.encoder:
|
| 229 |
+
return {}
|
| 230 |
+
|
| 231 |
+
centroids = {}
|
| 232 |
+
for aspect, seeds in aspect_seeds.items():
|
| 233 |
+
if not seeds:
|
| 234 |
+
continue
|
| 235 |
+
|
| 236 |
+
# Encode seeds
|
| 237 |
+
embeddings = self.encoder.encode(seeds, convert_to_tensor=False,
|
| 238 |
+
show_progress_bar=False)
|
| 239 |
+
embeddings = np.array(embeddings, dtype=np.float32)
|
| 240 |
+
|
| 241 |
+
# Compute normalized centroid
|
| 242 |
+
centroid = np.mean(embeddings, axis=0)
|
| 243 |
+
centroid = centroid / (np.linalg.norm(centroid) + 1e-8)
|
| 244 |
+
centroids[aspect] = centroid
|
| 245 |
+
|
| 246 |
+
self._centroid_cache[cache_key] = centroids
|
| 247 |
+
return centroids
|
| 248 |
+
|
| 249 |
+
def _split_text(self, text: str, max_chunks: int = 20) -> List[str]:
|
| 250 |
+
"""Split text into chunks for scoring"""
|
| 251 |
+
import re
|
| 252 |
+
|
| 253 |
+
# Split by sentences
|
| 254 |
+
sentences = re.split(r'[.!?]+', text)
|
| 255 |
+
chunks = [s.strip() for s in sentences if len(s.strip()) > 20]
|
| 256 |
+
|
| 257 |
+
# If too few, use sliding window
|
| 258 |
+
if len(chunks) < 3:
|
| 259 |
+
words = text.split()
|
| 260 |
+
window_size = 50
|
| 261 |
+
step = 25
|
| 262 |
+
chunks = []
|
| 263 |
+
for i in range(0, max(1, len(words) - window_size + 1), step):
|
| 264 |
+
chunk = ' '.join(words[i:i+window_size])
|
| 265 |
+
if len(chunk) > 20:
|
| 266 |
+
chunks.append(chunk)
|
| 267 |
+
|
| 268 |
+
return chunks[:max_chunks]
|
| 269 |
+
|
| 270 |
+
def extract_aspects(self,
|
| 271 |
+
text: str,
|
| 272 |
+
domain_hint: str = None,
|
| 273 |
+
skills: List[str] = None,
|
| 274 |
+
expected_aspects: List[str] = None) -> AspectExtractionResult:
|
| 275 |
+
"""
|
| 276 |
+
Extract aspects from text with fidelity validation
|
| 277 |
+
|
| 278 |
+
Args:
|
| 279 |
+
text: Text to analyze
|
| 280 |
+
domain_hint: Optional domain ID to use
|
| 281 |
+
skills: Optional list of skills for domain detection
|
| 282 |
+
expected_aspects: Optional list of expected aspects
|
| 283 |
+
|
| 284 |
+
Returns:
|
| 285 |
+
AspectExtractionResult with scores, evidence, and fidelity
|
| 286 |
+
"""
|
| 287 |
+
# 1. Domain Detection
|
| 288 |
+
if domain_hint and self.dkb and domain_hint in self.dkb.list_domains():
|
| 289 |
+
detected_domain = domain_hint
|
| 290 |
+
domain_confidence = 1.0
|
| 291 |
+
elif self.dkb:
|
| 292 |
+
detected_domain, domain_confidence = self.dkb.detect_domain(text, skills)
|
| 293 |
+
else:
|
| 294 |
+
detected_domain = 'general'
|
| 295 |
+
domain_confidence = 0.0
|
| 296 |
+
|
| 297 |
+
# 2. Get aspect prototypes
|
| 298 |
+
if self.dkb and detected_domain != 'general':
|
| 299 |
+
aspect_seeds = self.dkb.get_aspect_prototypes_for_domain(detected_domain)
|
| 300 |
+
else:
|
| 301 |
+
aspect_seeds = {}
|
| 302 |
+
|
| 303 |
+
# 3. Fallback to default aspects if needed
|
| 304 |
+
if not aspect_seeds:
|
| 305 |
+
# Use minimal default seeds
|
| 306 |
+
aspect_seeds = {
|
| 307 |
+
'technical_skills': ['developed software', 'built systems', 'coded in python'],
|
| 308 |
+
'problem_solving': ['solved problems', 'debugged issues', 'optimized performance'],
|
| 309 |
+
'leadership': ['led team', 'managed project', 'organized event'],
|
| 310 |
+
'communication': ['presented to', 'wrote documentation', 'explained to']
|
| 311 |
+
}
|
| 312 |
+
|
| 313 |
+
# 4. Get/compute centroids
|
| 314 |
+
centroids = self._get_centroids(detected_domain, aspect_seeds)
|
| 315 |
+
|
| 316 |
+
# 5. Score text against aspects
|
| 317 |
+
aspect_scores = {}
|
| 318 |
+
chunk_evidence = {aspect: [] for aspect in centroids.keys()}
|
| 319 |
+
|
| 320 |
+
if not text or len(text) < 20 or not self.encoder:
|
| 321 |
+
# Return empty result
|
| 322 |
+
fidelity = self.fidelity_scorer.score(text, {}, expected_aspects)
|
| 323 |
+
return AspectExtractionResult(
|
| 324 |
+
aspects={},
|
| 325 |
+
chunk_evidence={},
|
| 326 |
+
fidelity=fidelity,
|
| 327 |
+
detected_domain=detected_domain,
|
| 328 |
+
domain_confidence=domain_confidence
|
| 329 |
+
)
|
| 330 |
+
|
| 331 |
+
# Split and encode text
|
| 332 |
+
chunks = self._split_text(text)
|
| 333 |
+
if not chunks:
|
| 334 |
+
fidelity = self.fidelity_scorer.score(text, {}, expected_aspects)
|
| 335 |
+
return AspectExtractionResult(
|
| 336 |
+
aspects={},
|
| 337 |
+
chunk_evidence={},
|
| 338 |
+
fidelity=fidelity,
|
| 339 |
+
detected_domain=detected_domain,
|
| 340 |
+
domain_confidence=domain_confidence
|
| 341 |
+
)
|
| 342 |
+
|
| 343 |
+
chunk_embeddings = self.encoder.encode(chunks, convert_to_tensor=False,
|
| 344 |
+
show_progress_bar=False)
|
| 345 |
+
chunk_embeddings = np.array(chunk_embeddings, dtype=np.float32)
|
| 346 |
+
|
| 347 |
+
# Score each aspect
|
| 348 |
+
for aspect, centroid in centroids.items():
|
| 349 |
+
# Cosine similarities
|
| 350 |
+
sims = np.dot(chunk_embeddings, centroid) / (
|
| 351 |
+
np.linalg.norm(chunk_embeddings, axis=1) * np.linalg.norm(centroid) + 1e-8
|
| 352 |
+
)
|
| 353 |
+
|
| 354 |
+
# Scoring: weighted max + mean of top-k
|
| 355 |
+
max_sim = float(np.max(sims))
|
| 356 |
+
top_k = 3
|
| 357 |
+
topk_sims = np.partition(sims, -min(top_k, len(sims)))[-top_k:]
|
| 358 |
+
mean_topk = float(np.mean(topk_sims))
|
| 359 |
+
|
| 360 |
+
# Normalize to 0-1
|
| 361 |
+
raw_score = 0.6 * max_sim + 0.4 * mean_topk
|
| 362 |
+
normalized = (raw_score + 1) / 2
|
| 363 |
+
aspect_scores[aspect] = float(np.clip(normalized, 0, 1))
|
| 364 |
+
|
| 365 |
+
# Collect evidence chunks
|
| 366 |
+
threshold = 0.35
|
| 367 |
+
for i, sim in enumerate(sims):
|
| 368 |
+
if sim > threshold:
|
| 369 |
+
chunk_evidence[aspect].append(chunks[i])
|
| 370 |
+
|
| 371 |
+
# 6. Fidelity scoring
|
| 372 |
+
fidelity = self.fidelity_scorer.score(text, aspect_scores, expected_aspects)
|
| 373 |
+
|
| 374 |
+
return AspectExtractionResult(
|
| 375 |
+
aspects=aspect_scores,
|
| 376 |
+
chunk_evidence=chunk_evidence,
|
| 377 |
+
fidelity=fidelity,
|
| 378 |
+
detected_domain=detected_domain,
|
| 379 |
+
domain_confidence=domain_confidence
|
| 380 |
+
)
|
| 381 |
+
|
| 382 |
+
def score_student_text(self, text_responses: Dict[str, str],
|
| 383 |
+
domain_hint: str = None,
|
| 384 |
+
skills: List[str] = None) -> Dict[str, Any]:
|
| 385 |
+
"""
|
| 386 |
+
Score all text responses for a student
|
| 387 |
+
|
| 388 |
+
Args:
|
| 389 |
+
text_responses: Dict with text_q1, text_q2, text_q3
|
| 390 |
+
domain_hint: Optional domain ID
|
| 391 |
+
skills: Optional skills list for domain detection
|
| 392 |
+
|
| 393 |
+
Returns:
|
| 394 |
+
Comprehensive scoring result
|
| 395 |
+
"""
|
| 396 |
+
text_q1 = text_responses.get('text_q1', '')
|
| 397 |
+
text_q2 = text_responses.get('text_q2', '')
|
| 398 |
+
text_q3 = text_responses.get('text_q3', '')
|
| 399 |
+
|
| 400 |
+
# Combined text for domain detection
|
| 401 |
+
combined_text = f"{text_q1} {text_q2} {text_q3}"
|
| 402 |
+
|
| 403 |
+
# Expected aspects per question
|
| 404 |
+
q1_aspects = ['technical_skills', 'problem_solving', 'learning_agility']
|
| 405 |
+
q2_aspects = ['career_alignment', 'initiative', 'learning_agility']
|
| 406 |
+
q3_aspects = ['leadership', 'teamwork', 'communication']
|
| 407 |
+
|
| 408 |
+
# Extract aspects per question
|
| 409 |
+
q1_result = self.extract_aspects(text_q1, domain_hint, skills, q1_aspects)
|
| 410 |
+
q2_result = self.extract_aspects(text_q2, domain_hint, skills, q2_aspects)
|
| 411 |
+
q3_result = self.extract_aspects(text_q3, domain_hint, skills, q3_aspects)
|
| 412 |
+
|
| 413 |
+
# Aggregate scores
|
| 414 |
+
all_aspects = {}
|
| 415 |
+
for result in [q1_result, q2_result, q3_result]:
|
| 416 |
+
for aspect, score in result.aspects.items():
|
| 417 |
+
if aspect in all_aspects:
|
| 418 |
+
all_aspects[aspect] = max(all_aspects[aspect], score)
|
| 419 |
+
else:
|
| 420 |
+
all_aspects[aspect] = score
|
| 421 |
+
|
| 422 |
+
# Overall metrics
|
| 423 |
+
avg_fidelity = np.mean([
|
| 424 |
+
q1_result.fidelity.score,
|
| 425 |
+
q2_result.fidelity.score,
|
| 426 |
+
q3_result.fidelity.score
|
| 427 |
+
])
|
| 428 |
+
|
| 429 |
+
# Compute weighted text score
|
| 430 |
+
weights = {
|
| 431 |
+
'technical_skills': 0.15,
|
| 432 |
+
'problem_solving': 0.10,
|
| 433 |
+
'leadership': 0.20,
|
| 434 |
+
'communication': 0.15,
|
| 435 |
+
'teamwork': 0.10,
|
| 436 |
+
'learning_agility': 0.10,
|
| 437 |
+
'initiative': 0.10,
|
| 438 |
+
'career_alignment': 0.10
|
| 439 |
+
}
|
| 440 |
+
|
| 441 |
+
weighted_score = sum(
|
| 442 |
+
all_aspects.get(aspect, 0.3) * weight
|
| 443 |
+
for aspect, weight in weights.items()
|
| 444 |
+
)
|
| 445 |
+
|
| 446 |
+
# Confidence based on fidelity
|
| 447 |
+
confidence = avg_fidelity
|
| 448 |
+
|
| 449 |
+
return {
|
| 450 |
+
'score': round(weighted_score, 3),
|
| 451 |
+
'confidence': round(confidence, 3),
|
| 452 |
+
'detected_domain': q1_result.detected_domain,
|
| 453 |
+
'domain_confidence': round(q1_result.domain_confidence, 3),
|
| 454 |
+
'aspects': {k: round(v, 3) for k, v in all_aspects.items()},
|
| 455 |
+
'fidelity': {
|
| 456 |
+
'overall': round(avg_fidelity, 3),
|
| 457 |
+
'q1': round(q1_result.fidelity.score, 3),
|
| 458 |
+
'q2': round(q2_result.fidelity.score, 3),
|
| 459 |
+
'q3': round(q3_result.fidelity.score, 3)
|
| 460 |
+
},
|
| 461 |
+
'issues': (
|
| 462 |
+
q1_result.fidelity.issues +
|
| 463 |
+
q2_result.fidelity.issues +
|
| 464 |
+
q3_result.fidelity.issues
|
| 465 |
+
)
|
| 466 |
+
}
|
| 467 |
+
|
| 468 |
+
|
| 469 |
+
# Singleton
|
| 470 |
+
_fct_instance: Optional[FidelityCriteriaTransformer] = None
|
| 471 |
+
|
| 472 |
+
|
| 473 |
+
def get_fidelity_transformer(model_name: str = None,
|
| 474 |
+
domains_dir: str = None) -> FidelityCriteriaTransformer:
|
| 475 |
+
"""Get or create singleton FCT instance"""
|
| 476 |
+
global _fct_instance
|
| 477 |
+
|
| 478 |
+
if _fct_instance is None:
|
| 479 |
+
_fct_instance = FidelityCriteriaTransformer(model_name, domains_dir)
|
| 480 |
+
|
| 481 |
+
return _fct_instance
|
services/fusion.py
ADDED
|
@@ -0,0 +1,150 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Fusion Engine - Confidence-weighted Score Fusion"""
|
| 2 |
+
from typing import Dict, Tuple, Optional
|
| 3 |
+
from config import Config
|
| 4 |
+
|
| 5 |
+
class FusionEngine:
|
| 6 |
+
"""Combines scores from all modules with confidence weighting"""
|
| 7 |
+
|
| 8 |
+
def __init__(self):
|
| 9 |
+
# Base weights (when no domain evidence)
|
| 10 |
+
self.base_weights = {
|
| 11 |
+
'universal': Config.UNIVERSAL_WEIGHT,
|
| 12 |
+
'personality': Config.PERSONALITY_WEIGHT,
|
| 13 |
+
'text': Config.TEXT_WEIGHT
|
| 14 |
+
}
|
| 15 |
+
|
| 16 |
+
# Extended weights (when domain evidence exists)
|
| 17 |
+
self.extended_weights = {
|
| 18 |
+
'universal': 0.30, # Reduced from base
|
| 19 |
+
'personality': 0.25,
|
| 20 |
+
'text': 0.25,
|
| 21 |
+
'domain': 0.20 # New domain component
|
| 22 |
+
}
|
| 23 |
+
|
| 24 |
+
def fuse_scores(
|
| 25 |
+
self,
|
| 26 |
+
universal_score: float,
|
| 27 |
+
universal_confidence: float,
|
| 28 |
+
personality_score: float,
|
| 29 |
+
personality_confidence: float,
|
| 30 |
+
text_score: float,
|
| 31 |
+
text_confidence: float,
|
| 32 |
+
domain_score: Optional[float] = None,
|
| 33 |
+
domain_confidence: Optional[float] = None
|
| 34 |
+
) -> Tuple[float, Dict]:
|
| 35 |
+
"""
|
| 36 |
+
Fuse scores with confidence weighting
|
| 37 |
+
Supports optional domain score for pluggable domain evidence
|
| 38 |
+
|
| 39 |
+
Returns: (final_score, breakdown)
|
| 40 |
+
"""
|
| 41 |
+
|
| 42 |
+
# Determine which weights to use
|
| 43 |
+
has_domain = domain_score is not None and domain_confidence is not None and domain_confidence > 0
|
| 44 |
+
weights = self.extended_weights if has_domain else self.base_weights
|
| 45 |
+
|
| 46 |
+
# Calculate effective weights (weight * confidence)
|
| 47 |
+
effective_weights = {
|
| 48 |
+
'universal': weights['universal'] * universal_confidence,
|
| 49 |
+
'personality': weights['personality'] * personality_confidence,
|
| 50 |
+
'text': weights['text'] * text_confidence
|
| 51 |
+
}
|
| 52 |
+
|
| 53 |
+
# Add domain if available
|
| 54 |
+
if has_domain:
|
| 55 |
+
effective_weights['domain'] = weights['domain'] * domain_confidence
|
| 56 |
+
|
| 57 |
+
# Sum of effective weights (for normalization)
|
| 58 |
+
total_effective_weight = sum(effective_weights.values())
|
| 59 |
+
|
| 60 |
+
# Prevent division by zero
|
| 61 |
+
if total_effective_weight == 0:
|
| 62 |
+
breakdown = {
|
| 63 |
+
'final_score': 0.0,
|
| 64 |
+
'component_scores': {
|
| 65 |
+
'universal': 0.0,
|
| 66 |
+
'personality': 0.0,
|
| 67 |
+
'text': 0.0
|
| 68 |
+
},
|
| 69 |
+
'confidences': {
|
| 70 |
+
'universal': 0.0,
|
| 71 |
+
'personality': 0.0,
|
| 72 |
+
'text': 0.0
|
| 73 |
+
},
|
| 74 |
+
'effective_weights': effective_weights,
|
| 75 |
+
'has_domain': False
|
| 76 |
+
}
|
| 77 |
+
|
| 78 |
+
if has_domain:
|
| 79 |
+
breakdown['component_scores']['domain'] = 0.0
|
| 80 |
+
breakdown['confidences']['domain'] = 0.0
|
| 81 |
+
|
| 82 |
+
return 0.0, breakdown
|
| 83 |
+
|
| 84 |
+
# Calculate fused score
|
| 85 |
+
fused_score = (
|
| 86 |
+
effective_weights['universal'] * universal_score +
|
| 87 |
+
effective_weights['personality'] * personality_score +
|
| 88 |
+
effective_weights['text'] * text_score
|
| 89 |
+
)
|
| 90 |
+
|
| 91 |
+
if has_domain:
|
| 92 |
+
fused_score += effective_weights['domain'] * domain_score
|
| 93 |
+
|
| 94 |
+
fused_score /= total_effective_weight
|
| 95 |
+
|
| 96 |
+
# Prepare breakdown
|
| 97 |
+
breakdown = {
|
| 98 |
+
'final_score': round(fused_score, 4),
|
| 99 |
+
'component_scores': {
|
| 100 |
+
'universal': round(universal_score, 4),
|
| 101 |
+
'personality': round(personality_score, 4),
|
| 102 |
+
'text': round(text_score, 4)
|
| 103 |
+
},
|
| 104 |
+
'confidences': {
|
| 105 |
+
'universal': round(universal_confidence, 4),
|
| 106 |
+
'personality': round(personality_confidence, 4),
|
| 107 |
+
'text': round(text_confidence, 4)
|
| 108 |
+
},
|
| 109 |
+
'effective_weights': {
|
| 110 |
+
k: round(v / total_effective_weight, 4)
|
| 111 |
+
for k, v in effective_weights.items()
|
| 112 |
+
},
|
| 113 |
+
'base_weights': weights,
|
| 114 |
+
'has_domain': has_domain
|
| 115 |
+
}
|
| 116 |
+
|
| 117 |
+
# Add domain info if present
|
| 118 |
+
if has_domain:
|
| 119 |
+
breakdown['component_scores']['domain'] = round(domain_score, 4)
|
| 120 |
+
breakdown['confidences']['domain'] = round(domain_confidence, 4)
|
| 121 |
+
|
| 122 |
+
return fused_score, breakdown
|
| 123 |
+
|
| 124 |
+
def get_grade(self, final_score: float) -> str:
|
| 125 |
+
"""Convert score to letter grade"""
|
| 126 |
+
if final_score >= 0.9:
|
| 127 |
+
return 'A+'
|
| 128 |
+
elif final_score >= 0.85:
|
| 129 |
+
return 'A'
|
| 130 |
+
elif final_score >= 0.8:
|
| 131 |
+
return 'A-'
|
| 132 |
+
elif final_score >= 0.75:
|
| 133 |
+
return 'B+'
|
| 134 |
+
elif final_score >= 0.7:
|
| 135 |
+
return 'B'
|
| 136 |
+
elif final_score >= 0.65:
|
| 137 |
+
return 'B-'
|
| 138 |
+
elif final_score >= 0.6:
|
| 139 |
+
return 'C+'
|
| 140 |
+
elif final_score >= 0.55:
|
| 141 |
+
return 'C'
|
| 142 |
+
elif final_score >= 0.5:
|
| 143 |
+
return 'C-'
|
| 144 |
+
else:
|
| 145 |
+
return 'D'
|
| 146 |
+
|
| 147 |
+
def get_percentile(self, final_score: float) -> int:
|
| 148 |
+
"""Estimate percentile (mock for MVP)"""
|
| 149 |
+
# In production, this would query actual distribution
|
| 150 |
+
return min(int(final_score * 100), 99)
|
services/personality_module.py
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Personality Module - Big Five Trait Scoring"""
|
| 2 |
+
import numpy as np
|
| 3 |
+
from typing import Dict, Tuple
|
| 4 |
+
|
| 5 |
+
class PersonalityModule:
|
| 6 |
+
"""Scores personality based on Big Five traits"""
|
| 7 |
+
|
| 8 |
+
def __init__(self):
|
| 9 |
+
# Map questions to traits (reversed questions need inverse scoring)
|
| 10 |
+
self.trait_mapping = {
|
| 11 |
+
'openness': ['p_q1', 'p_q3', 'p_q4'],
|
| 12 |
+
'openness_r': ['p_q2'], # Reversed
|
| 13 |
+
'conscientiousness': ['p_q5', 'p_q7', 'p_q8'],
|
| 14 |
+
'conscientiousness_r': ['p_q6'], # Reversed
|
| 15 |
+
'extraversion': ['p_q9', 'p_q11', 'p_q12'],
|
| 16 |
+
'extraversion_r': ['p_q10'], # Reversed
|
| 17 |
+
'agreeableness': ['p_q13', 'p_q15', 'p_q16'],
|
| 18 |
+
'agreeableness_r': ['p_q14'], # Reversed
|
| 19 |
+
'stability': ['p_q17', 'p_q19', 'p_q20'],
|
| 20 |
+
'stability_r': ['p_q18'] # Reversed
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
# Weights for employability (some traits matter more)
|
| 24 |
+
self.trait_weights = {
|
| 25 |
+
'openness': 0.20,
|
| 26 |
+
'conscientiousness': 0.30,
|
| 27 |
+
'extraversion': 0.20,
|
| 28 |
+
'agreeableness': 0.15,
|
| 29 |
+
'stability': 0.15
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
def score(self, responses: Dict[str, int]) -> Tuple[float, float, Dict]:
|
| 33 |
+
"""
|
| 34 |
+
Calculate personality score from 20 questions
|
| 35 |
+
Returns: (score, confidence, trait_scores)
|
| 36 |
+
"""
|
| 37 |
+
trait_scores = {}
|
| 38 |
+
|
| 39 |
+
# Calculate each trait score
|
| 40 |
+
for trait in ['openness', 'conscientiousness', 'extraversion', 'agreeableness', 'stability']:
|
| 41 |
+
# Get normal questions
|
| 42 |
+
normal_qs = self.trait_mapping[trait]
|
| 43 |
+
reversed_qs = self.trait_mapping[f'{trait}_r']
|
| 44 |
+
|
| 45 |
+
# Calculate average for this trait
|
| 46 |
+
scores = []
|
| 47 |
+
|
| 48 |
+
# Normal questions: higher = better
|
| 49 |
+
for q in normal_qs:
|
| 50 |
+
if q in responses:
|
| 51 |
+
scores.append((responses[q] - 1) / 4.0) # Normalize 1-5 to 0-1
|
| 52 |
+
|
| 53 |
+
# Reversed questions: lower = better
|
| 54 |
+
for q in reversed_qs:
|
| 55 |
+
if q in responses:
|
| 56 |
+
scores.append((5 - responses[q]) / 4.0) # Invert and normalize
|
| 57 |
+
|
| 58 |
+
if scores:
|
| 59 |
+
trait_scores[trait] = np.mean(scores)
|
| 60 |
+
else:
|
| 61 |
+
trait_scores[trait] = 0.5 # Neutral if missing
|
| 62 |
+
|
| 63 |
+
# Calculate overall personality score
|
| 64 |
+
personality_score = sum(
|
| 65 |
+
trait_scores[trait] * self.trait_weights[trait]
|
| 66 |
+
for trait in trait_scores.keys()
|
| 67 |
+
)
|
| 68 |
+
|
| 69 |
+
# Calculate confidence based on question completion
|
| 70 |
+
total_questions = 20
|
| 71 |
+
answered_questions = len(responses)
|
| 72 |
+
confidence = answered_questions / total_questions
|
| 73 |
+
|
| 74 |
+
return personality_score, confidence, trait_scores
|
| 75 |
+
|
| 76 |
+
def explain(self, trait_scores: Dict) -> Dict:
|
| 77 |
+
"""Generate explanation for personality scores"""
|
| 78 |
+
explanations = {
|
| 79 |
+
'top_strengths': [],
|
| 80 |
+
'areas_for_growth': []
|
| 81 |
+
}
|
| 82 |
+
|
| 83 |
+
# Sort traits by score
|
| 84 |
+
sorted_traits = sorted(trait_scores.items(), key=lambda x: x[1], reverse=True)
|
| 85 |
+
|
| 86 |
+
# Top 2 strengths
|
| 87 |
+
for trait, score in sorted_traits[:2]:
|
| 88 |
+
if score > 0.6:
|
| 89 |
+
explanations['top_strengths'].append({
|
| 90 |
+
'trait': trait.capitalize(),
|
| 91 |
+
'score': round(score, 2),
|
| 92 |
+
'description': self._get_trait_description(trait, score)
|
| 93 |
+
})
|
| 94 |
+
|
| 95 |
+
# Bottom 2 areas for growth
|
| 96 |
+
for trait, score in sorted_traits[-2:]:
|
| 97 |
+
if score < 0.5:
|
| 98 |
+
explanations['areas_for_growth'].append({
|
| 99 |
+
'trait': trait.capitalize(),
|
| 100 |
+
'score': round(score, 2),
|
| 101 |
+
'description': self._get_trait_description(trait, score)
|
| 102 |
+
})
|
| 103 |
+
|
| 104 |
+
return explanations
|
| 105 |
+
|
| 106 |
+
def _get_trait_description(self, trait: str, score: float) -> str:
|
| 107 |
+
"""Get description of trait"""
|
| 108 |
+
descriptions = {
|
| 109 |
+
'openness': {
|
| 110 |
+
'high': "Highly creative, curious, and open to new experiences",
|
| 111 |
+
'low': "Prefers routine and traditional approaches"
|
| 112 |
+
},
|
| 113 |
+
'conscientiousness': {
|
| 114 |
+
'high': "Very organized, reliable, and goal-oriented",
|
| 115 |
+
'low': "May benefit from improved organization and planning"
|
| 116 |
+
},
|
| 117 |
+
'extraversion': {
|
| 118 |
+
'high': "Energetic, sociable, and thrives in team environments",
|
| 119 |
+
'low': "Prefers independent work and smaller groups"
|
| 120 |
+
},
|
| 121 |
+
'agreeableness': {
|
| 122 |
+
'high': "Cooperative, empathetic, and team-oriented",
|
| 123 |
+
'low': "Independent thinker, comfortable with competition"
|
| 124 |
+
},
|
| 125 |
+
'stability': {
|
| 126 |
+
'high': "Emotionally stable, handles stress well",
|
| 127 |
+
'low': "May experience stress in high-pressure situations"
|
| 128 |
+
}
|
| 129 |
+
}
|
| 130 |
+
|
| 131 |
+
level = 'high' if score > 0.6 else 'low'
|
| 132 |
+
return descriptions.get(trait, {}).get(level, f"{trait} score: {score:.2f}")
|
services/student_output.py
ADDED
|
@@ -0,0 +1,411 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Student Output Service - Individual student JSON formatting
|
| 3 |
+
Provides structured analysis output for single students
|
| 4 |
+
"""
|
| 5 |
+
import logging
|
| 6 |
+
from typing import Dict, List, Any, Optional
|
| 7 |
+
from datetime import datetime
|
| 8 |
+
from dataclasses import dataclass, asdict
|
| 9 |
+
|
| 10 |
+
logger = logging.getLogger(__name__)
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
@dataclass
|
| 14 |
+
class SkillRecommendation:
|
| 15 |
+
"""Skill gap recommendation for student"""
|
| 16 |
+
skill: str
|
| 17 |
+
priority: str
|
| 18 |
+
recommended_courses: List[str]
|
| 19 |
+
certifications: List[str]
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
@dataclass
|
| 23 |
+
class CareerPath:
|
| 24 |
+
"""Suggested career path"""
|
| 25 |
+
role: str
|
| 26 |
+
fit_score: float
|
| 27 |
+
requirements_met: List[str]
|
| 28 |
+
requirements_gap: List[str]
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
class StudentOutputService:
|
| 32 |
+
"""
|
| 33 |
+
Formats individual student analysis into structured JSON
|
| 34 |
+
"""
|
| 35 |
+
|
| 36 |
+
# Grade thresholds
|
| 37 |
+
GRADE_THRESHOLDS = [
|
| 38 |
+
(0.90, 'A+', 'Outstanding'),
|
| 39 |
+
(0.80, 'A', 'Excellent'),
|
| 40 |
+
(0.70, 'B+', 'Very Good'),
|
| 41 |
+
(0.60, 'B', 'Good'),
|
| 42 |
+
(0.50, 'C', 'Average'),
|
| 43 |
+
(0.40, 'D', 'Below Average'),
|
| 44 |
+
(0.00, 'F', 'Needs Improvement')
|
| 45 |
+
]
|
| 46 |
+
|
| 47 |
+
def __init__(self):
|
| 48 |
+
# Role requirements mapping
|
| 49 |
+
self.career_requirements = {
|
| 50 |
+
'software_engineer': {
|
| 51 |
+
'required': ['python', 'sql', 'git', 'problem_solving'],
|
| 52 |
+
'preferred': ['cloud', 'docker', 'system_design']
|
| 53 |
+
},
|
| 54 |
+
'data_scientist': {
|
| 55 |
+
'required': ['python', 'sql', 'statistics', 'machine_learning'],
|
| 56 |
+
'preferred': ['deep_learning', 'spark', 'mlops']
|
| 57 |
+
},
|
| 58 |
+
'product_manager': {
|
| 59 |
+
'required': ['communication', 'leadership', 'analytics'],
|
| 60 |
+
'preferred': ['sql', 'strategic_thinking', 'stakeholder_management']
|
| 61 |
+
},
|
| 62 |
+
'mechanical_engineer': {
|
| 63 |
+
'required': ['cad', 'engineering_drawing', 'manufacturing'],
|
| 64 |
+
'preferred': ['fea', 'cfd', 'automation']
|
| 65 |
+
}
|
| 66 |
+
}
|
| 67 |
+
|
| 68 |
+
def format_student_output(self,
|
| 69 |
+
student_id: str,
|
| 70 |
+
score_packet: Dict[str, Any],
|
| 71 |
+
domain_analysis: Dict[str, Any] = None,
|
| 72 |
+
raw_data: Dict[str, Any] = None) -> Dict[str, Any]:
|
| 73 |
+
"""
|
| 74 |
+
Format comprehensive student analysis JSON
|
| 75 |
+
|
| 76 |
+
Args:
|
| 77 |
+
student_id: Student identifier
|
| 78 |
+
score_packet: Output from scoring endpoint
|
| 79 |
+
domain_analysis: Output from FCT (optional)
|
| 80 |
+
raw_data: Original student data (optional)
|
| 81 |
+
|
| 82 |
+
Returns:
|
| 83 |
+
Structured student JSON
|
| 84 |
+
"""
|
| 85 |
+
# Extract core scores
|
| 86 |
+
final_score = score_packet.get('final_score', 0)
|
| 87 |
+
grade, grade_desc = self._get_grade(final_score)
|
| 88 |
+
|
| 89 |
+
# Component scores
|
| 90 |
+
component_scores = score_packet.get('scores', {}).get('component_scores', {})
|
| 91 |
+
confidences = score_packet.get('scores', {}).get('confidences', {})
|
| 92 |
+
detailed_features = score_packet.get('detailed_features', {})
|
| 93 |
+
|
| 94 |
+
# Domain info
|
| 95 |
+
detected_domain = (
|
| 96 |
+
domain_analysis.get('detected_domain') if domain_analysis
|
| 97 |
+
else score_packet.get('domain_type', 'general')
|
| 98 |
+
)
|
| 99 |
+
|
| 100 |
+
# Build output
|
| 101 |
+
output = {
|
| 102 |
+
'student_id': student_id,
|
| 103 |
+
'generated_at': datetime.utcnow().isoformat() + 'Z',
|
| 104 |
+
|
| 105 |
+
'summary': {
|
| 106 |
+
'final_score': round(final_score, 3),
|
| 107 |
+
'grade': grade,
|
| 108 |
+
'grade_description': grade_desc,
|
| 109 |
+
'percentile': score_packet.get('percentile', 50),
|
| 110 |
+
'placement_ready': final_score >= 0.60
|
| 111 |
+
},
|
| 112 |
+
|
| 113 |
+
'scores': {
|
| 114 |
+
'universal': {
|
| 115 |
+
'score': round(component_scores.get('universal', 0), 3),
|
| 116 |
+
'confidence': round(confidences.get('universal', 0), 3),
|
| 117 |
+
'features': detailed_features.get('universal', {})
|
| 118 |
+
},
|
| 119 |
+
'personality': {
|
| 120 |
+
'score': round(component_scores.get('personality', 0), 3),
|
| 121 |
+
'confidence': round(confidences.get('personality', 0), 3),
|
| 122 |
+
'traits': detailed_features.get('personality', {})
|
| 123 |
+
},
|
| 124 |
+
'text': {
|
| 125 |
+
'score': round(component_scores.get('text', 0), 3),
|
| 126 |
+
'confidence': round(confidences.get('text', 0), 3),
|
| 127 |
+
'aspects': detailed_features.get('text', {})
|
| 128 |
+
}
|
| 129 |
+
},
|
| 130 |
+
|
| 131 |
+
'domain_analysis': self._format_domain_analysis(
|
| 132 |
+
detected_domain, domain_analysis, raw_data
|
| 133 |
+
),
|
| 134 |
+
|
| 135 |
+
'strengths': self._identify_strengths(detailed_features),
|
| 136 |
+
|
| 137 |
+
'improvement_areas': self._identify_improvements(detailed_features),
|
| 138 |
+
|
| 139 |
+
'career_suggestions': self._suggest_careers(
|
| 140 |
+
detected_domain, detailed_features, raw_data
|
| 141 |
+
),
|
| 142 |
+
|
| 143 |
+
'skill_recommendations': self._recommend_skills(
|
| 144 |
+
detected_domain, raw_data
|
| 145 |
+
),
|
| 146 |
+
|
| 147 |
+
'explanations': score_packet.get('explanations', {})
|
| 148 |
+
}
|
| 149 |
+
|
| 150 |
+
# Add fidelity if available
|
| 151 |
+
if domain_analysis and 'fidelity' in domain_analysis:
|
| 152 |
+
output['fidelity_assessment'] = domain_analysis['fidelity']
|
| 153 |
+
|
| 154 |
+
return output
|
| 155 |
+
|
| 156 |
+
def _get_grade(self, score: float) -> tuple:
|
| 157 |
+
"""Get grade and description for score"""
|
| 158 |
+
for threshold, grade, desc in self.GRADE_THRESHOLDS:
|
| 159 |
+
if score >= threshold:
|
| 160 |
+
return (grade, desc)
|
| 161 |
+
return ('F', 'Needs Improvement')
|
| 162 |
+
|
| 163 |
+
def _format_domain_analysis(self, detected_domain: str,
|
| 164 |
+
domain_analysis: Dict,
|
| 165 |
+
raw_data: Dict) -> Dict[str, Any]:
|
| 166 |
+
"""Format domain-specific analysis"""
|
| 167 |
+
result = {
|
| 168 |
+
'detected_domain': detected_domain,
|
| 169 |
+
'display_name': detected_domain.replace('_', ' ').title()
|
| 170 |
+
}
|
| 171 |
+
|
| 172 |
+
if domain_analysis:
|
| 173 |
+
result['domain_confidence'] = domain_analysis.get('domain_confidence', 0)
|
| 174 |
+
result['aspects'] = domain_analysis.get('aspects', {})
|
| 175 |
+
|
| 176 |
+
# Skill gaps from raw data
|
| 177 |
+
if raw_data and 'skills' in raw_data:
|
| 178 |
+
skills = raw_data.get('skills', [])
|
| 179 |
+
if isinstance(skills, str):
|
| 180 |
+
skills = [s.strip().lower() for s in skills.split(',')]
|
| 181 |
+
result['current_skills'] = skills
|
| 182 |
+
|
| 183 |
+
return result
|
| 184 |
+
|
| 185 |
+
def _identify_strengths(self, features: Dict) -> List[Dict]:
|
| 186 |
+
"""Identify top strengths from features"""
|
| 187 |
+
strengths = []
|
| 188 |
+
|
| 189 |
+
# Universal features
|
| 190 |
+
universal = features.get('universal', {})
|
| 191 |
+
if universal.get('cgpa_norm', 0) > 0.8:
|
| 192 |
+
strengths.append({
|
| 193 |
+
'area': 'Academic Excellence',
|
| 194 |
+
'score': universal['cgpa_norm'],
|
| 195 |
+
'description': 'Strong academic performance with high CGPA'
|
| 196 |
+
})
|
| 197 |
+
|
| 198 |
+
if universal.get('internship_exposure', 0) > 0.7:
|
| 199 |
+
strengths.append({
|
| 200 |
+
'area': 'Industry Experience',
|
| 201 |
+
'score': universal['internship_exposure'],
|
| 202 |
+
'description': 'Significant practical experience through internships'
|
| 203 |
+
})
|
| 204 |
+
|
| 205 |
+
# Personality traits
|
| 206 |
+
personality = features.get('personality', {})
|
| 207 |
+
for trait, score in personality.items():
|
| 208 |
+
if score > 0.75:
|
| 209 |
+
strengths.append({
|
| 210 |
+
'area': trait.title(),
|
| 211 |
+
'score': score,
|
| 212 |
+
'description': self._get_trait_description(trait, 'high')
|
| 213 |
+
})
|
| 214 |
+
|
| 215 |
+
# Text aspects
|
| 216 |
+
text = features.get('text', {})
|
| 217 |
+
if text.get('leadership_score', 0) > 0.7:
|
| 218 |
+
strengths.append({
|
| 219 |
+
'area': 'Leadership',
|
| 220 |
+
'score': text['leadership_score'],
|
| 221 |
+
'description': 'Demonstrated leadership abilities with concrete examples'
|
| 222 |
+
})
|
| 223 |
+
|
| 224 |
+
if text.get('technical_skills', 0) > 0.7:
|
| 225 |
+
strengths.append({
|
| 226 |
+
'area': 'Technical Skills',
|
| 227 |
+
'score': text['technical_skills'],
|
| 228 |
+
'description': 'Strong technical competencies'
|
| 229 |
+
})
|
| 230 |
+
|
| 231 |
+
# Sort by score and return top 5
|
| 232 |
+
strengths.sort(key=lambda x: x['score'], reverse=True)
|
| 233 |
+
return strengths[:5]
|
| 234 |
+
|
| 235 |
+
def _identify_improvements(self, features: Dict) -> List[Dict]:
|
| 236 |
+
"""Identify areas needing improvement"""
|
| 237 |
+
improvements = []
|
| 238 |
+
|
| 239 |
+
# Universal features
|
| 240 |
+
universal = features.get('universal', {})
|
| 241 |
+
if universal.get('ec_quality', 0) < 0.4:
|
| 242 |
+
improvements.append({
|
| 243 |
+
'area': 'Extracurricular Activities',
|
| 244 |
+
'current_score': universal.get('ec_quality', 0),
|
| 245 |
+
'suggestion': 'Join clubs, participate in competitions, or take leadership roles'
|
| 246 |
+
})
|
| 247 |
+
|
| 248 |
+
if universal.get('cert_quality', 0) < 0.4:
|
| 249 |
+
improvements.append({
|
| 250 |
+
'area': 'Professional Certifications',
|
| 251 |
+
'current_score': universal.get('cert_quality', 0),
|
| 252 |
+
'suggestion': 'Pursue industry-recognized certifications in your domain'
|
| 253 |
+
})
|
| 254 |
+
|
| 255 |
+
# Text aspects
|
| 256 |
+
text = features.get('text', {})
|
| 257 |
+
if text.get('communication', 0) < 0.5:
|
| 258 |
+
improvements.append({
|
| 259 |
+
'area': 'Communication Skills',
|
| 260 |
+
'current_score': text.get('communication', 0),
|
| 261 |
+
'suggestion': 'Practice public speaking, write detailed project documentation'
|
| 262 |
+
})
|
| 263 |
+
|
| 264 |
+
if text.get('career_alignment', 0) < 0.5:
|
| 265 |
+
improvements.append({
|
| 266 |
+
'area': 'Career Clarity',
|
| 267 |
+
'current_score': text.get('career_alignment', 0),
|
| 268 |
+
'suggestion': 'Define clear short-term and long-term career goals'
|
| 269 |
+
})
|
| 270 |
+
|
| 271 |
+
# Sort by score (lowest first)
|
| 272 |
+
improvements.sort(key=lambda x: x['current_score'])
|
| 273 |
+
return improvements[:4]
|
| 274 |
+
|
| 275 |
+
def _suggest_careers(self, domain: str, features: Dict,
|
| 276 |
+
raw_data: Dict) -> List[Dict]:
|
| 277 |
+
"""Suggest career paths based on profile"""
|
| 278 |
+
suggestions = []
|
| 279 |
+
|
| 280 |
+
# Get student skills
|
| 281 |
+
skills = []
|
| 282 |
+
if raw_data and 'skills' in raw_data:
|
| 283 |
+
skills_raw = raw_data.get('skills', [])
|
| 284 |
+
if isinstance(skills_raw, str):
|
| 285 |
+
skills = [s.strip().lower() for s in skills_raw.split(',')]
|
| 286 |
+
else:
|
| 287 |
+
skills = [s.lower() for s in skills_raw]
|
| 288 |
+
|
| 289 |
+
# Text features for soft skills
|
| 290 |
+
text = features.get('text', {})
|
| 291 |
+
|
| 292 |
+
for role, reqs in self.career_requirements.items():
|
| 293 |
+
# Calculate fit score
|
| 294 |
+
required_met = sum(1 for r in reqs['required']
|
| 295 |
+
if r in skills or self._has_soft_skill(r, text))
|
| 296 |
+
preferred_met = sum(1 for p in reqs['preferred']
|
| 297 |
+
if p in skills or self._has_soft_skill(p, text))
|
| 298 |
+
|
| 299 |
+
total_reqs = len(reqs['required'])
|
| 300 |
+
fit_score = (required_met / total_reqs) if total_reqs else 0
|
| 301 |
+
fit_score += (preferred_met / len(reqs['preferred'])) * 0.3 if reqs['preferred'] else 0
|
| 302 |
+
fit_score = min(fit_score, 1.0)
|
| 303 |
+
|
| 304 |
+
if fit_score > 0.3: # Minimum threshold
|
| 305 |
+
suggestions.append({
|
| 306 |
+
'role': role.replace('_', ' ').title(),
|
| 307 |
+
'fit_score': round(fit_score, 2),
|
| 308 |
+
'requirements_met': [r for r in reqs['required']
|
| 309 |
+
if r in skills or self._has_soft_skill(r, text)],
|
| 310 |
+
'requirements_gap': [r for r in reqs['required']
|
| 311 |
+
if r not in skills and not self._has_soft_skill(r, text)]
|
| 312 |
+
})
|
| 313 |
+
|
| 314 |
+
# Sort by fit score
|
| 315 |
+
suggestions.sort(key=lambda x: x['fit_score'], reverse=True)
|
| 316 |
+
return suggestions[:3]
|
| 317 |
+
|
| 318 |
+
def _has_soft_skill(self, skill: str, text_features: Dict) -> bool:
|
| 319 |
+
"""Check if student has a soft skill based on text analysis"""
|
| 320 |
+
skill_mapping = {
|
| 321 |
+
'communication': 'communication',
|
| 322 |
+
'leadership': 'leadership_score',
|
| 323 |
+
'problem_solving': 'problem_solving',
|
| 324 |
+
'teamwork': 'teamwork'
|
| 325 |
+
}
|
| 326 |
+
|
| 327 |
+
if skill in skill_mapping:
|
| 328 |
+
return text_features.get(skill_mapping[skill], 0) > 0.6
|
| 329 |
+
return False
|
| 330 |
+
|
| 331 |
+
def _recommend_skills(self, domain: str, raw_data: Dict) -> List[Dict]:
|
| 332 |
+
"""Recommend skills to acquire"""
|
| 333 |
+
recommendations = []
|
| 334 |
+
|
| 335 |
+
# Domain-specific recommendations
|
| 336 |
+
domain_skills = {
|
| 337 |
+
'software_engineering': [
|
| 338 |
+
{'skill': 'cloud', 'courses': ['AWS Solutions Architect', 'GCP Fundamentals']},
|
| 339 |
+
{'skill': 'system_design', 'courses': ['Grokking System Design']},
|
| 340 |
+
{'skill': 'devops', 'courses': ['Docker Mastery', 'Kubernetes']}
|
| 341 |
+
],
|
| 342 |
+
'data_science': [
|
| 343 |
+
{'skill': 'deep_learning', 'courses': ['Deep Learning Specialization']},
|
| 344 |
+
{'skill': 'mlops', 'courses': ['MLOps for Production']},
|
| 345 |
+
{'skill': 'statistics', 'courses': ['Statistics with Python']}
|
| 346 |
+
],
|
| 347 |
+
'mechanical_engineering': [
|
| 348 |
+
{'skill': 'ev_powertrain', 'courses': ['Electric Vehicle Technology']},
|
| 349 |
+
{'skill': 'automation', 'courses': ['Industrial Automation', 'PLC']}
|
| 350 |
+
]
|
| 351 |
+
}
|
| 352 |
+
|
| 353 |
+
# Get current skills
|
| 354 |
+
current_skills = []
|
| 355 |
+
if raw_data and 'skills' in raw_data:
|
| 356 |
+
skills_raw = raw_data.get('skills', [])
|
| 357 |
+
if isinstance(skills_raw, str):
|
| 358 |
+
current_skills = [s.strip().lower() for s in skills_raw.split(',')]
|
| 359 |
+
|
| 360 |
+
# Recommend missing skills
|
| 361 |
+
domain_recs = domain_skills.get(domain, domain_skills.get('software_engineering', []))
|
| 362 |
+
|
| 363 |
+
for rec in domain_recs:
|
| 364 |
+
if rec['skill'] not in current_skills:
|
| 365 |
+
recommendations.append({
|
| 366 |
+
'skill': rec['skill'].replace('_', ' ').title(),
|
| 367 |
+
'priority': 'high',
|
| 368 |
+
'recommended_courses': rec['courses'],
|
| 369 |
+
'certifications': []
|
| 370 |
+
})
|
| 371 |
+
|
| 372 |
+
return recommendations[:4]
|
| 373 |
+
|
| 374 |
+
def _get_trait_description(self, trait: str, level: str) -> str:
|
| 375 |
+
"""Get description for personality trait"""
|
| 376 |
+
descriptions = {
|
| 377 |
+
'openness': {
|
| 378 |
+
'high': 'Creative, curious, and open to new experiences',
|
| 379 |
+
'low': 'Practical and focused on concrete tasks'
|
| 380 |
+
},
|
| 381 |
+
'conscientiousness': {
|
| 382 |
+
'high': 'Organized, disciplined, and reliable',
|
| 383 |
+
'low': 'Flexible and adaptable to changing situations'
|
| 384 |
+
},
|
| 385 |
+
'extraversion': {
|
| 386 |
+
'high': 'Energetic, sociable, and thrives in team settings',
|
| 387 |
+
'low': 'Focused, reflective, and excels in independent work'
|
| 388 |
+
},
|
| 389 |
+
'agreeableness': {
|
| 390 |
+
'high': 'Cooperative, empathetic, and team-oriented',
|
| 391 |
+
'low': 'Independent thinker, comfortable with competition'
|
| 392 |
+
},
|
| 393 |
+
'stability': {
|
| 394 |
+
'high': 'Emotionally resilient and handles stress well',
|
| 395 |
+
'low': 'Sensitive and responsive to feedback'
|
| 396 |
+
}
|
| 397 |
+
}
|
| 398 |
+
|
| 399 |
+
return descriptions.get(trait, {}).get(level, f"Strong {trait}")
|
| 400 |
+
|
| 401 |
+
|
| 402 |
+
# Singleton
|
| 403 |
+
_student_output_service: Optional[StudentOutputService] = None
|
| 404 |
+
|
| 405 |
+
|
| 406 |
+
def get_student_output_service() -> StudentOutputService:
|
| 407 |
+
"""Get singleton student output service"""
|
| 408 |
+
global _student_output_service
|
| 409 |
+
if _student_output_service is None:
|
| 410 |
+
_student_output_service = StudentOutputService()
|
| 411 |
+
return _student_output_service
|
services/text_module.py
ADDED
|
@@ -0,0 +1,211 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Text Embeddings Module - NLP-based Scoring"""
|
| 2 |
+
import numpy as np
|
| 3 |
+
from sentence_transformers import SentenceTransformer
|
| 4 |
+
from typing import Dict, Tuple
|
| 5 |
+
import re
|
| 6 |
+
|
| 7 |
+
class TextModule:
|
| 8 |
+
"""Scores text responses using SBERT embeddings and heuristics"""
|
| 9 |
+
|
| 10 |
+
def __init__(self):
|
| 11 |
+
# Load SBERT model
|
| 12 |
+
self.model = SentenceTransformer('all-MiniLM-L6-v2')
|
| 13 |
+
|
| 14 |
+
# Reference embeddings for ideal responses
|
| 15 |
+
self.reference_embeddings = {
|
| 16 |
+
'strengths': self.model.encode([
|
| 17 |
+
"I have strong technical skills in programming, problem-solving, and software development",
|
| 18 |
+
"My strengths include leadership, communication, and analytical thinking",
|
| 19 |
+
"I excel at teamwork, project management, and innovative solutions"
|
| 20 |
+
]),
|
| 21 |
+
'career': self.model.encode([
|
| 22 |
+
"I am interested in software engineering and technology innovation",
|
| 23 |
+
"I want to work in data science and machine learning",
|
| 24 |
+
"My goal is to become a product manager and lead technical teams"
|
| 25 |
+
])
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
+
# Leadership keywords
|
| 29 |
+
self.leadership_keywords = [
|
| 30 |
+
'lead', 'leader', 'leadership', 'managed', 'organized', 'president',
|
| 31 |
+
'head', 'coordinator', 'captain', 'founded', 'initiated', 'directed'
|
| 32 |
+
]
|
| 33 |
+
|
| 34 |
+
def score(self, text_responses: Dict[str, str]) -> Tuple[float, float, Dict]:
|
| 35 |
+
"""
|
| 36 |
+
Calculate text score from 3 textual responses
|
| 37 |
+
Returns: (score, confidence, features)
|
| 38 |
+
"""
|
| 39 |
+
features = {}
|
| 40 |
+
|
| 41 |
+
text_q1 = text_responses.get('text_q1', '')
|
| 42 |
+
text_q2 = text_responses.get('text_q2', '')
|
| 43 |
+
text_q3 = text_responses.get('text_q3', '')
|
| 44 |
+
|
| 45 |
+
# Feature 1: Writing quality (text_q1 - strengths)
|
| 46 |
+
features['writing_quality'] = self._assess_writing_quality(text_q1)
|
| 47 |
+
|
| 48 |
+
# Feature 2: Intent coherence (text_q2 - career interests)
|
| 49 |
+
features['intent_coherence'] = self._assess_intent_coherence(text_q2)
|
| 50 |
+
|
| 51 |
+
# Feature 3: Leadership flag (text_q3 - extracurriculars)
|
| 52 |
+
features['leadership_score'] = self._assess_leadership(text_q3)
|
| 53 |
+
|
| 54 |
+
# Feature 4: Content depth (all responses)
|
| 55 |
+
features['content_depth'] = self._assess_content_depth(text_q1, text_q2, text_q3)
|
| 56 |
+
|
| 57 |
+
# Calculate overall text score
|
| 58 |
+
text_score = (
|
| 59 |
+
features['writing_quality'] * 0.25 +
|
| 60 |
+
features['intent_coherence'] * 0.25 +
|
| 61 |
+
features['leadership_score'] * 0.30 +
|
| 62 |
+
features['content_depth'] * 0.20
|
| 63 |
+
)
|
| 64 |
+
|
| 65 |
+
# Calculate confidence based on response completeness
|
| 66 |
+
confidence = self._calculate_confidence(text_q1, text_q2, text_q3)
|
| 67 |
+
|
| 68 |
+
return text_score, confidence, features
|
| 69 |
+
|
| 70 |
+
def _assess_writing_quality(self, text: str) -> float:
|
| 71 |
+
"""Assess writing quality using heuristics"""
|
| 72 |
+
if not text or len(text) < 50:
|
| 73 |
+
return 0.2
|
| 74 |
+
|
| 75 |
+
score = 0.5 # Base score
|
| 76 |
+
|
| 77 |
+
# Length check (150-300 words ideal)
|
| 78 |
+
word_count = len(text.split())
|
| 79 |
+
if 150 <= word_count <= 300:
|
| 80 |
+
score += 0.3
|
| 81 |
+
elif 100 <= word_count < 150 or 300 < word_count <= 400:
|
| 82 |
+
score += 0.2
|
| 83 |
+
else:
|
| 84 |
+
score += 0.1
|
| 85 |
+
|
| 86 |
+
# Sentence structure (multiple sentences)
|
| 87 |
+
sentences = re.split(r'[.!?]+', text)
|
| 88 |
+
if len(sentences) >= 5:
|
| 89 |
+
score += 0.1
|
| 90 |
+
|
| 91 |
+
# Proper capitalization
|
| 92 |
+
if text[0].isupper():
|
| 93 |
+
score += 0.05
|
| 94 |
+
|
| 95 |
+
# No excessive repetition
|
| 96 |
+
words = text.lower().split()
|
| 97 |
+
unique_ratio = len(set(words)) / len(words) if words else 0
|
| 98 |
+
if unique_ratio > 0.6:
|
| 99 |
+
score += 0.05
|
| 100 |
+
|
| 101 |
+
return min(score, 1.0)
|
| 102 |
+
|
| 103 |
+
def _assess_intent_coherence(self, text: str) -> float:
|
| 104 |
+
"""Assess career intent coherence using embeddings"""
|
| 105 |
+
if not text or len(text) < 50:
|
| 106 |
+
return 0.2
|
| 107 |
+
|
| 108 |
+
# Encode the response
|
| 109 |
+
response_embedding = self.model.encode([text])[0]
|
| 110 |
+
|
| 111 |
+
# Calculate similarity with reference career embeddings
|
| 112 |
+
similarities = []
|
| 113 |
+
for ref_emb in self.reference_embeddings['career']:
|
| 114 |
+
similarity = np.dot(response_embedding, ref_emb) / (
|
| 115 |
+
np.linalg.norm(response_embedding) * np.linalg.norm(ref_emb)
|
| 116 |
+
)
|
| 117 |
+
similarities.append(similarity)
|
| 118 |
+
|
| 119 |
+
# Take max similarity
|
| 120 |
+
max_similarity = max(similarities) if similarities else 0
|
| 121 |
+
|
| 122 |
+
# Normalize to 0-1 (cosine similarity is -1 to 1)
|
| 123 |
+
score = (max_similarity + 1) / 2
|
| 124 |
+
|
| 125 |
+
return score
|
| 126 |
+
|
| 127 |
+
def _assess_leadership(self, text: str) -> float:
|
| 128 |
+
"""Assess leadership based on keywords"""
|
| 129 |
+
if not text or len(text) < 50:
|
| 130 |
+
return 0.2
|
| 131 |
+
|
| 132 |
+
text_lower = text.lower()
|
| 133 |
+
|
| 134 |
+
# Count leadership keywords
|
| 135 |
+
keyword_count = sum(1 for keyword in self.leadership_keywords if keyword in text_lower)
|
| 136 |
+
|
| 137 |
+
# Base score on keyword presence
|
| 138 |
+
if keyword_count >= 3:
|
| 139 |
+
score = 1.0
|
| 140 |
+
elif keyword_count == 2:
|
| 141 |
+
score = 0.8
|
| 142 |
+
elif keyword_count == 1:
|
| 143 |
+
score = 0.6
|
| 144 |
+
else:
|
| 145 |
+
score = 0.3
|
| 146 |
+
|
| 147 |
+
# Bonus for specific leadership phrases
|
| 148 |
+
if 'led a team' in text_lower or 'team lead' in text_lower:
|
| 149 |
+
score = min(score + 0.1, 1.0)
|
| 150 |
+
|
| 151 |
+
return score
|
| 152 |
+
|
| 153 |
+
def _assess_content_depth(self, text_q1: str, text_q2: str, text_q3: str) -> float:
|
| 154 |
+
"""Assess overall content depth"""
|
| 155 |
+
total_words = len(text_q1.split()) + len(text_q2.split()) + len(text_q3.split())
|
| 156 |
+
|
| 157 |
+
if total_words >= 450: # 150+ words each
|
| 158 |
+
return 1.0
|
| 159 |
+
elif total_words >= 300:
|
| 160 |
+
return 0.8
|
| 161 |
+
elif total_words >= 200:
|
| 162 |
+
return 0.6
|
| 163 |
+
elif total_words >= 100:
|
| 164 |
+
return 0.4
|
| 165 |
+
else:
|
| 166 |
+
return 0.2
|
| 167 |
+
|
| 168 |
+
def _calculate_confidence(self, text_q1: str, text_q2: str, text_q3: str) -> float:
|
| 169 |
+
"""Calculate confidence based on completeness"""
|
| 170 |
+
scores = []
|
| 171 |
+
|
| 172 |
+
for text in [text_q1, text_q2, text_q3]:
|
| 173 |
+
if not text:
|
| 174 |
+
scores.append(0)
|
| 175 |
+
elif len(text) < 50:
|
| 176 |
+
scores.append(0.3)
|
| 177 |
+
elif len(text) < 100:
|
| 178 |
+
scores.append(0.6)
|
| 179 |
+
else:
|
| 180 |
+
scores.append(1.0)
|
| 181 |
+
|
| 182 |
+
return np.mean(scores)
|
| 183 |
+
|
| 184 |
+
def explain(self, features: Dict) -> Dict:
|
| 185 |
+
"""Generate explanation for text scores"""
|
| 186 |
+
explanations = {
|
| 187 |
+
'highlights': [],
|
| 188 |
+
'suggestions': []
|
| 189 |
+
}
|
| 190 |
+
|
| 191 |
+
# Highlights
|
| 192 |
+
if features.get('writing_quality', 0) > 0.7:
|
| 193 |
+
explanations['highlights'].append("Strong writing quality with clear communication")
|
| 194 |
+
|
| 195 |
+
if features.get('leadership_score', 0) > 0.7:
|
| 196 |
+
explanations['highlights'].append("Demonstrated leadership experience and initiative")
|
| 197 |
+
|
| 198 |
+
if features.get('intent_coherence', 0) > 0.7:
|
| 199 |
+
explanations['highlights'].append("Clear and coherent career goals")
|
| 200 |
+
|
| 201 |
+
# Suggestions
|
| 202 |
+
if features.get('writing_quality', 0) < 0.5:
|
| 203 |
+
explanations['suggestions'].append("Provide more detailed responses (aim for 150-300 words each)")
|
| 204 |
+
|
| 205 |
+
if features.get('leadership_score', 0) < 0.5:
|
| 206 |
+
explanations['suggestions'].append("Highlight specific leadership roles and their impact")
|
| 207 |
+
|
| 208 |
+
if features.get('content_depth', 0) < 0.5:
|
| 209 |
+
explanations['suggestions'].append("Include more specific examples and achievements")
|
| 210 |
+
|
| 211 |
+
return explanations
|
services/text_module_v2.py
ADDED
|
@@ -0,0 +1,576 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Text Embeddings Module V2 - Aspect-based Prototype Extraction"""
|
| 2 |
+
import os
|
| 3 |
+
import json
|
| 4 |
+
import logging
|
| 5 |
+
import numpy as np
|
| 6 |
+
from datetime import datetime
|
| 7 |
+
from typing import Dict, Tuple, List, Optional
|
| 8 |
+
from sentence_transformers import SentenceTransformer
|
| 9 |
+
|
| 10 |
+
logger = logging.getLogger(__name__)
|
| 11 |
+
|
| 12 |
+
# Default aspect seeds (built-in fallback)
|
| 13 |
+
DEFAULT_ASPECT_SEEDS = {
|
| 14 |
+
"leadership": [
|
| 15 |
+
"led a team", "was team lead", "managed a project", "supervised interns",
|
| 16 |
+
"coordinated a cross-functional team", "organized the club", "president of the society",
|
| 17 |
+
"captain of the team", "ran weekly standups", "delegated tasks", "mentored junior members",
|
| 18 |
+
"headed the project", "oversaw project timelines", "chaired the committee",
|
| 19 |
+
"led end-to-end delivery", "directed project milestones", "led a 5-person team",
|
| 20 |
+
"managed stakeholders", "took ownership of the initiative", "led code reviews",
|
| 21 |
+
"organized campus events", "led product demo sessions", "led recruitment for volunteers",
|
| 22 |
+
"managed vendor relationships", "spearheaded the outreach program"
|
| 23 |
+
],
|
| 24 |
+
"technical_skills": [
|
| 25 |
+
"developed a web API", "implemented RESTful services", "coded in python",
|
| 26 |
+
"built machine learning models", "trained neural networks", "implemented data pipelines",
|
| 27 |
+
"used pandas for ETL", "designed database schemas", "built microservices",
|
| 28 |
+
"deployed models using docker", "worked with FastAPI", "implemented CI/CD",
|
| 29 |
+
"wrote unit tests", "optimized SQL queries", "used scikit-learn",
|
| 30 |
+
"developed recommendation systems", "built feature engineering pipelines",
|
| 31 |
+
"deployed to cloud", "developed ETL jobs", "worked with Kafka",
|
| 32 |
+
"implemented caching layers", "used TensorFlow or PyTorch", "built backend services",
|
| 33 |
+
"wrote production-grade code", "integrated third-party APIs"
|
| 34 |
+
],
|
| 35 |
+
"problem_solving": [
|
| 36 |
+
"solved complex problem", "debugged production issues", "optimized an algorithm",
|
| 37 |
+
"reduced latency of service", "designed a scalable solution", "investigated root cause",
|
| 38 |
+
"improved system reliability", "created a novel solution", "troubleshot integration issues",
|
| 39 |
+
"automated manual tasks", "reduced memory usage", "resolved data pipeline failures",
|
| 40 |
+
"refactored critical code", "handled edge cases", "iterated on prototypes",
|
| 41 |
+
"performed A/B testing to decide", "diagnosed performance bottlenecks",
|
| 42 |
+
"designed fallback strategies", "resolved deployment failures", "created monitoring & alerts"
|
| 43 |
+
],
|
| 44 |
+
"internships_experience": [
|
| 45 |
+
"summer internship", "industrial training", "interned at", "worked as an intern",
|
| 46 |
+
"internship project", "internship in data science", "interned at a startup",
|
| 47 |
+
"completed internship at", "interned with the engineering team", "intern experience",
|
| 48 |
+
"interned at an e-commerce company", "industrial internship", "co-op placement",
|
| 49 |
+
"paid internship", "research internship", "interned as a software engineer",
|
| 50 |
+
"on-the-job training", "worked under mentor", "internship-driven project",
|
| 51 |
+
"corporate internship"
|
| 52 |
+
],
|
| 53 |
+
"communication": [
|
| 54 |
+
"presented to stakeholders", "gave a presentation", "wrote documentation",
|
| 55 |
+
"authored reports", "explained results to non-technical", "public speaking",
|
| 56 |
+
"delivered demo", "prepared slides", "wrote user guides", "communicated with clients",
|
| 57 |
+
"collaborated across teams", "conducted knowledge transfer", "wrote clear emails",
|
| 58 |
+
"explained technical concepts", "presented project outcomes", "led demo sessions",
|
| 59 |
+
"created onboarding docs", "contributed to team discussions", "led workshops",
|
| 60 |
+
"hosted training sessions"
|
| 61 |
+
],
|
| 62 |
+
"teamwork": [
|
| 63 |
+
"collaborated with team", "worked in a cross-functional team", "paired programming",
|
| 64 |
+
"contributed to group project", "supported teammates", "collaborated on design",
|
| 65 |
+
"worked with designers and PMs", "helped teammates debug", "co-authored project",
|
| 66 |
+
"mentored peers", "shared responsibilities", "worked effectively in group",
|
| 67 |
+
"contributed in agile team", "participated in sprints", "assisted in integration"
|
| 68 |
+
],
|
| 69 |
+
"project_execution": [
|
| 70 |
+
"delivered project on time", "met project deadlines", "managed milestones",
|
| 71 |
+
"handled project planning", "released production features", "coordinated deployment",
|
| 72 |
+
"delivered MVP", "tracked KPIs", "managed scope", "created project timeline",
|
| 73 |
+
"ran retrospectives", "managed feature rollout", "ensured on-time delivery",
|
| 74 |
+
"performed release validations", "deployed analytics dashboard", "iterated based on feedback"
|
| 75 |
+
],
|
| 76 |
+
"initiative": [
|
| 77 |
+
"initiated a project", "proposed a new idea", "took initiative", "started a side project",
|
| 78 |
+
"built a proof of concept", "started a campus chapter", "created an automation",
|
| 79 |
+
"improved an existing process", "volunteered to lead", "identified improvement areas",
|
| 80 |
+
"launched a mini-product", "ran a pilot program", "created onboarding scripts",
|
| 81 |
+
"led process improvements", "started a mentoring circle"
|
| 82 |
+
],
|
| 83 |
+
"learning_agility": [
|
| 84 |
+
"quick learner", "self-taught", "learned new framework", "picked up new language",
|
| 85 |
+
"adapted to new tech", "completed online courses", "upskilled via projects",
|
| 86 |
+
"transitioned domains", "learned on the job", "rapidly onboarded", "attended workshops",
|
| 87 |
+
"completed bootcamp", "took certification courses", "learned through documentation",
|
| 88 |
+
"scaled knowledge quickly", "adapted to changing scope"
|
| 89 |
+
],
|
| 90 |
+
"career_alignment": [
|
| 91 |
+
"career goal is", "aspire to become", "interested in data science",
|
| 92 |
+
"pursue a role in product", "long-term goal", "want to specialize in",
|
| 93 |
+
"career objective", "planning to pursue masters", "aim to work in industry",
|
| 94 |
+
"seek product management roles", "interested in research", "want to join a startup",
|
| 95 |
+
"targeting roles in ML engineering", "aiming for consulting roles",
|
| 96 |
+
"career path is focused on"
|
| 97 |
+
]
|
| 98 |
+
}
|
| 99 |
+
|
| 100 |
+
# Question to aspects mapping
|
| 101 |
+
QUESTION_ASPECT_MAP = {
|
| 102 |
+
"text_q1": ["technical_skills", "problem_solving", "learning_agility", "initiative", "communication"],
|
| 103 |
+
"text_q2": ["career_alignment", "learning_agility", "initiative", "communication"],
|
| 104 |
+
"text_q3": ["leadership", "teamwork", "project_execution", "internships_experience", "communication"]
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
class TextModuleV2:
|
| 109 |
+
"""Enhanced text scoring using aspect-based prototypes with all-mpnet-base-v2"""
|
| 110 |
+
|
| 111 |
+
def __init__(self, model_name: str = None, seeds_path: str = "./aspect_seeds.json",
|
| 112 |
+
centroids_path: str = "./aspect_centroids.npz"):
|
| 113 |
+
# Config: allow model override via env or param
|
| 114 |
+
self.model_name = model_name or os.getenv('ASPECT_MODEL_NAME', 'all-mpnet-base-v2')
|
| 115 |
+
self.seeds_path = seeds_path
|
| 116 |
+
self.centroids_path = centroids_path
|
| 117 |
+
|
| 118 |
+
# Load model
|
| 119 |
+
logger.info(f"Loading sentence transformer model: {self.model_name}")
|
| 120 |
+
self.model = SentenceTransformer(self.model_name, device='cpu')
|
| 121 |
+
|
| 122 |
+
# Load seeds
|
| 123 |
+
self.aspect_seeds = self._load_seeds()
|
| 124 |
+
|
| 125 |
+
# Load or build centroids
|
| 126 |
+
self.centroids = self._load_or_build_centroids()
|
| 127 |
+
|
| 128 |
+
logger.info(f"TextModuleV2 initialized with {len(self.aspect_seeds)} aspects")
|
| 129 |
+
|
| 130 |
+
def _load_seeds(self) -> Dict[str, List[str]]:
|
| 131 |
+
"""Load aspect seeds from JSON or use defaults"""
|
| 132 |
+
if os.path.exists(self.seeds_path):
|
| 133 |
+
try:
|
| 134 |
+
with open(self.seeds_path, 'r', encoding='utf-8') as f:
|
| 135 |
+
seeds = json.load(f)
|
| 136 |
+
logger.info(f"Loaded aspect seeds from {self.seeds_path}")
|
| 137 |
+
return seeds
|
| 138 |
+
except Exception as e:
|
| 139 |
+
logger.warning(f"Failed to load seeds from {self.seeds_path}: {e}. Using defaults.")
|
| 140 |
+
return DEFAULT_ASPECT_SEEDS.copy()
|
| 141 |
+
|
| 142 |
+
def _load_or_build_centroids(self) -> Dict[str, np.ndarray]:
|
| 143 |
+
"""Load cached centroids or build from seeds"""
|
| 144 |
+
if os.path.exists(self.centroids_path):
|
| 145 |
+
try:
|
| 146 |
+
data = np.load(self.centroids_path)
|
| 147 |
+
centroids = {key: data[key] for key in data.files}
|
| 148 |
+
logger.info(f"Loaded centroids from {self.centroids_path}")
|
| 149 |
+
return centroids
|
| 150 |
+
except Exception as e:
|
| 151 |
+
logger.warning(f"Failed to load centroids: {e}. Rebuilding.")
|
| 152 |
+
|
| 153 |
+
return self.build_prototypes(self.aspect_seeds, self.model)
|
| 154 |
+
|
| 155 |
+
def build_prototypes(self, aspect_seeds: Dict[str, List[str]],
|
| 156 |
+
model: SentenceTransformer) -> Dict[str, np.ndarray]:
|
| 157 |
+
"""Build centroid prototypes from seed phrases"""
|
| 158 |
+
logger.info("Building aspect centroids...")
|
| 159 |
+
centroids = {}
|
| 160 |
+
|
| 161 |
+
for aspect, seeds in aspect_seeds.items():
|
| 162 |
+
if not seeds:
|
| 163 |
+
logger.warning(f"Aspect '{aspect}' has no seeds, skipping")
|
| 164 |
+
continue
|
| 165 |
+
|
| 166 |
+
# Encode seeds (CPU, convert_to_tensor=False)
|
| 167 |
+
embeddings = model.encode(seeds, convert_to_tensor=False, show_progress_bar=False)
|
| 168 |
+
embeddings = np.array(embeddings, dtype=np.float32)
|
| 169 |
+
|
| 170 |
+
# Compute centroid
|
| 171 |
+
centroid = np.mean(embeddings, axis=0)
|
| 172 |
+
centroid = centroid / np.linalg.norm(centroid) # Normalize
|
| 173 |
+
centroids[aspect] = centroid
|
| 174 |
+
|
| 175 |
+
# Save centroids
|
| 176 |
+
try:
|
| 177 |
+
np.savez(self.centroids_path, **centroids)
|
| 178 |
+
logger.info(f"Saved centroids to {self.centroids_path}")
|
| 179 |
+
except Exception as e:
|
| 180 |
+
logger.error(f"Failed to save centroids: {e}")
|
| 181 |
+
|
| 182 |
+
return centroids
|
| 183 |
+
|
| 184 |
+
def score_text_aspects(self, text: str, centroids: Dict[str, np.ndarray],
|
| 185 |
+
top_k: int = 3) -> Tuple[Dict[str, float], Dict[str, List[str]], float]:
|
| 186 |
+
"""
|
| 187 |
+
Score text against aspect centroids
|
| 188 |
+
Returns: (aspect_scores, chunk_assignments, confidence)
|
| 189 |
+
"""
|
| 190 |
+
if not text or len(text) < 20:
|
| 191 |
+
return {}, {}, 0.0
|
| 192 |
+
|
| 193 |
+
# Split into chunks (sentences or 50-word windows)
|
| 194 |
+
chunks = self._split_text(text)
|
| 195 |
+
if not chunks:
|
| 196 |
+
return {}, {}, 0.0
|
| 197 |
+
|
| 198 |
+
# Encode chunks
|
| 199 |
+
chunk_embeddings = self.model.encode(chunks, convert_to_tensor=False, show_progress_bar=False)
|
| 200 |
+
chunk_embeddings = np.array(chunk_embeddings, dtype=np.float32)
|
| 201 |
+
|
| 202 |
+
# Score each aspect
|
| 203 |
+
aspect_scores = {}
|
| 204 |
+
chunk_assignments = {aspect: [] for aspect in centroids.keys()}
|
| 205 |
+
|
| 206 |
+
for aspect, centroid in centroids.items():
|
| 207 |
+
# Compute cosine similarities
|
| 208 |
+
sims = np.dot(chunk_embeddings, centroid) / (
|
| 209 |
+
np.linalg.norm(chunk_embeddings, axis=1) * np.linalg.norm(centroid) + 1e-8
|
| 210 |
+
)
|
| 211 |
+
|
| 212 |
+
# Scoring formula: 0.6 * max_sim + 0.4 * mean_topk
|
| 213 |
+
max_sim = np.max(sims)
|
| 214 |
+
topk_sims = np.partition(sims, -min(top_k, len(sims)))[-top_k:]
|
| 215 |
+
mean_topk = np.mean(topk_sims)
|
| 216 |
+
|
| 217 |
+
# Map from [-1,1] to [0,1]
|
| 218 |
+
raw_score = 0.6 * max_sim + 0.4 * mean_topk
|
| 219 |
+
normalized_score = (raw_score + 1) / 2
|
| 220 |
+
aspect_scores[aspect] = float(np.clip(normalized_score, 0, 1))
|
| 221 |
+
|
| 222 |
+
# Assign chunks with sim > threshold
|
| 223 |
+
threshold = 0.3
|
| 224 |
+
for i, sim in enumerate(sims):
|
| 225 |
+
if sim > threshold:
|
| 226 |
+
chunk_assignments[aspect].append(chunks[i])
|
| 227 |
+
|
| 228 |
+
# Calculate confidence
|
| 229 |
+
confidence = self._calculate_aspect_confidence(text, aspect_scores)
|
| 230 |
+
|
| 231 |
+
return aspect_scores, chunk_assignments, confidence
|
| 232 |
+
|
| 233 |
+
def _split_text(self, text: str) -> List[str]:
|
| 234 |
+
"""Split text into scorable chunks"""
|
| 235 |
+
import re
|
| 236 |
+
# Split by sentences
|
| 237 |
+
sentences = re.split(r'[.!?]+', text)
|
| 238 |
+
chunks = [s.strip() for s in sentences if len(s.strip()) > 20]
|
| 239 |
+
|
| 240 |
+
# If too few sentences, use sliding window
|
| 241 |
+
if len(chunks) < 3:
|
| 242 |
+
words = text.split()
|
| 243 |
+
window_size = 50
|
| 244 |
+
step = 25
|
| 245 |
+
chunks = []
|
| 246 |
+
for i in range(0, max(1, len(words) - window_size + 1), step):
|
| 247 |
+
chunk = ' '.join(words[i:i+window_size])
|
| 248 |
+
if len(chunk) > 20:
|
| 249 |
+
chunks.append(chunk)
|
| 250 |
+
|
| 251 |
+
return chunks[:20] # Limit to 20 chunks
|
| 252 |
+
|
| 253 |
+
def _calculate_aspect_confidence(self, text: str, aspect_scores: Dict[str, float]) -> float:
|
| 254 |
+
"""Calculate confidence based on text quality and score distribution"""
|
| 255 |
+
if not aspect_scores:
|
| 256 |
+
return 0.0
|
| 257 |
+
|
| 258 |
+
# Text length factor
|
| 259 |
+
word_count = len(text.split())
|
| 260 |
+
length_factor = min(word_count / 150, 1.0)
|
| 261 |
+
|
| 262 |
+
# Score variance factor (higher variance = more confident signal)
|
| 263 |
+
scores = list(aspect_scores.values())
|
| 264 |
+
score_std = np.std(scores)
|
| 265 |
+
variance_factor = min(score_std * 2, 1.0)
|
| 266 |
+
|
| 267 |
+
# Max score factor
|
| 268 |
+
max_score = max(scores)
|
| 269 |
+
|
| 270 |
+
confidence = 0.4 * length_factor + 0.3 * variance_factor + 0.3 * max_score
|
| 271 |
+
return float(np.clip(confidence, 0, 1))
|
| 272 |
+
|
| 273 |
+
def score(self, text_responses: Dict[str, str]) -> Tuple[float, float, Dict]:
|
| 274 |
+
"""
|
| 275 |
+
Main scoring function - backward compatible interface
|
| 276 |
+
Returns: (score, confidence, features)
|
| 277 |
+
"""
|
| 278 |
+
text_q1 = text_responses.get('text_q1', '')
|
| 279 |
+
text_q2 = text_responses.get('text_q2', '')
|
| 280 |
+
text_q3 = text_responses.get('text_q3', '')
|
| 281 |
+
|
| 282 |
+
# Score each question with relevant aspects
|
| 283 |
+
q1_aspects = QUESTION_ASPECT_MAP['text_q1']
|
| 284 |
+
q2_aspects = QUESTION_ASPECT_MAP['text_q2']
|
| 285 |
+
q3_aspects = QUESTION_ASPECT_MAP['text_q3']
|
| 286 |
+
|
| 287 |
+
q1_centroids = {k: self.centroids[k] for k in q1_aspects if k in self.centroids}
|
| 288 |
+
q2_centroids = {k: self.centroids[k] for k in q2_aspects if k in self.centroids}
|
| 289 |
+
q3_centroids = {k: self.centroids[k] for k in q3_aspects if k in self.centroids}
|
| 290 |
+
|
| 291 |
+
q1_scores, _, q1_conf = self.score_text_aspects(text_q1, q1_centroids)
|
| 292 |
+
q2_scores, _, q2_conf = self.score_text_aspects(text_q2, q2_centroids)
|
| 293 |
+
q3_scores, _, q3_conf = self.score_text_aspects(text_q3, q3_centroids)
|
| 294 |
+
|
| 295 |
+
# Aggregate features
|
| 296 |
+
features = {}
|
| 297 |
+
|
| 298 |
+
# Technical skills from Q1
|
| 299 |
+
features['technical_skills'] = q1_scores.get('technical_skills', 0.3)
|
| 300 |
+
features['problem_solving'] = q1_scores.get('problem_solving', 0.3)
|
| 301 |
+
|
| 302 |
+
# Career alignment from Q2
|
| 303 |
+
features['career_alignment'] = q2_scores.get('career_alignment', 0.3)
|
| 304 |
+
features['learning_agility'] = max(
|
| 305 |
+
q1_scores.get('learning_agility', 0.3),
|
| 306 |
+
q2_scores.get('learning_agility', 0.3)
|
| 307 |
+
)
|
| 308 |
+
|
| 309 |
+
# Leadership from Q3
|
| 310 |
+
features['leadership_score'] = q3_scores.get('leadership', 0.3)
|
| 311 |
+
features['teamwork'] = q3_scores.get('teamwork', 0.3)
|
| 312 |
+
features['internships_experience'] = q3_scores.get('internships_experience', 0.3)
|
| 313 |
+
|
| 314 |
+
# Communication (averaged across all)
|
| 315 |
+
comm_scores = [
|
| 316 |
+
q1_scores.get('communication', 0.3),
|
| 317 |
+
q2_scores.get('communication', 0.3),
|
| 318 |
+
q3_scores.get('communication', 0.3)
|
| 319 |
+
]
|
| 320 |
+
features['communication'] = np.mean(comm_scores)
|
| 321 |
+
|
| 322 |
+
# Writing quality (heuristic)
|
| 323 |
+
features['writing_quality'] = self._assess_writing_quality(text_q1)
|
| 324 |
+
|
| 325 |
+
# Content depth
|
| 326 |
+
features['content_depth'] = self._assess_content_depth(text_q1, text_q2, text_q3)
|
| 327 |
+
|
| 328 |
+
# Calculate overall score (weighted combination)
|
| 329 |
+
text_score = (
|
| 330 |
+
features['technical_skills'] * 0.15 +
|
| 331 |
+
features['problem_solving'] * 0.10 +
|
| 332 |
+
features['leadership_score'] * 0.20 +
|
| 333 |
+
features['career_alignment'] * 0.10 +
|
| 334 |
+
features['communication'] * 0.15 +
|
| 335 |
+
features['teamwork'] * 0.10 +
|
| 336 |
+
features['learning_agility'] * 0.10 +
|
| 337 |
+
features['content_depth'] * 0.10
|
| 338 |
+
)
|
| 339 |
+
|
| 340 |
+
# Overall confidence
|
| 341 |
+
confidence = np.mean([q1_conf, q2_conf, q3_conf])
|
| 342 |
+
|
| 343 |
+
return text_score, confidence, features
|
| 344 |
+
|
| 345 |
+
def _assess_writing_quality(self, text: str) -> float:
|
| 346 |
+
"""Heuristic writing quality assessment"""
|
| 347 |
+
if not text or len(text) < 50:
|
| 348 |
+
return 0.2
|
| 349 |
+
|
| 350 |
+
score = 0.5
|
| 351 |
+
word_count = len(text.split())
|
| 352 |
+
|
| 353 |
+
if 150 <= word_count <= 300:
|
| 354 |
+
score += 0.3
|
| 355 |
+
elif 100 <= word_count < 150 or 300 < word_count <= 400:
|
| 356 |
+
score += 0.2
|
| 357 |
+
else:
|
| 358 |
+
score += 0.1
|
| 359 |
+
|
| 360 |
+
import re
|
| 361 |
+
sentences = re.split(r'[.!?]+', text)
|
| 362 |
+
if len(sentences) >= 5:
|
| 363 |
+
score += 0.1
|
| 364 |
+
|
| 365 |
+
if text[0].isupper():
|
| 366 |
+
score += 0.05
|
| 367 |
+
|
| 368 |
+
words = text.lower().split()
|
| 369 |
+
unique_ratio = len(set(words)) / len(words) if words else 0
|
| 370 |
+
if unique_ratio > 0.6:
|
| 371 |
+
score += 0.05
|
| 372 |
+
|
| 373 |
+
return min(score, 1.0)
|
| 374 |
+
|
| 375 |
+
def _assess_content_depth(self, text_q1: str, text_q2: str, text_q3: str) -> float:
|
| 376 |
+
"""Assess content depth"""
|
| 377 |
+
total_words = len(text_q1.split()) + len(text_q2.split()) + len(text_q3.split())
|
| 378 |
+
|
| 379 |
+
if total_words >= 450:
|
| 380 |
+
return 1.0
|
| 381 |
+
elif total_words >= 300:
|
| 382 |
+
return 0.8
|
| 383 |
+
elif total_words >= 200:
|
| 384 |
+
return 0.6
|
| 385 |
+
elif total_words >= 100:
|
| 386 |
+
return 0.4
|
| 387 |
+
else:
|
| 388 |
+
return 0.2
|
| 389 |
+
|
| 390 |
+
def explain(self, features: Dict) -> Dict:
|
| 391 |
+
"""Generate explanations"""
|
| 392 |
+
explanations = {
|
| 393 |
+
'highlights': [],
|
| 394 |
+
'suggestions': []
|
| 395 |
+
}
|
| 396 |
+
|
| 397 |
+
if features.get('technical_skills', 0) > 0.7:
|
| 398 |
+
explanations['highlights'].append("Strong technical skills demonstrated")
|
| 399 |
+
|
| 400 |
+
if features.get('leadership_score', 0) > 0.7:
|
| 401 |
+
explanations['highlights'].append("Clear leadership experience")
|
| 402 |
+
|
| 403 |
+
if features.get('career_alignment', 0) > 0.7:
|
| 404 |
+
explanations['highlights'].append("Well-defined career goals")
|
| 405 |
+
|
| 406 |
+
if features.get('communication', 0) > 0.7:
|
| 407 |
+
explanations['highlights'].append("Excellent communication skills")
|
| 408 |
+
|
| 409 |
+
if features.get('writing_quality', 0) < 0.5:
|
| 410 |
+
explanations['suggestions'].append("Provide more detailed responses (150-300 words each)")
|
| 411 |
+
|
| 412 |
+
if features.get('leadership_score', 0) < 0.5:
|
| 413 |
+
explanations['suggestions'].append("Highlight leadership roles with specific examples")
|
| 414 |
+
|
| 415 |
+
if features.get('technical_skills', 0) < 0.5:
|
| 416 |
+
explanations['suggestions'].append("Describe technical projects and skills in detail")
|
| 417 |
+
|
| 418 |
+
return explanations
|
| 419 |
+
|
| 420 |
+
# Admin functions
|
| 421 |
+
def get_aspect_seeds(self) -> Dict[str, List[str]]:
|
| 422 |
+
"""Return current loaded seeds"""
|
| 423 |
+
return self.aspect_seeds.copy()
|
| 424 |
+
|
| 425 |
+
def update_aspect_seeds(self, new_seeds: Dict[str, List[str]],
|
| 426 |
+
persist: bool = True) -> Dict:
|
| 427 |
+
"""
|
| 428 |
+
Update aspect seeds and recompute centroids
|
| 429 |
+
Returns: stats dict
|
| 430 |
+
"""
|
| 431 |
+
# Validate
|
| 432 |
+
if not isinstance(new_seeds, dict):
|
| 433 |
+
raise ValueError("new_seeds must be a dict")
|
| 434 |
+
|
| 435 |
+
for key, seeds in new_seeds.items():
|
| 436 |
+
if not isinstance(key, str):
|
| 437 |
+
raise ValueError(f"Aspect key must be string, got {type(key)}")
|
| 438 |
+
if not isinstance(seeds, list) or not seeds:
|
| 439 |
+
raise ValueError(f"Seeds for '{key}' must be non-empty list")
|
| 440 |
+
if not all(isinstance(s, str) for s in seeds):
|
| 441 |
+
raise ValueError(f"All seeds for '{key}' must be strings")
|
| 442 |
+
|
| 443 |
+
# Update seeds
|
| 444 |
+
self.aspect_seeds = new_seeds.copy()
|
| 445 |
+
|
| 446 |
+
# Recompute centroids
|
| 447 |
+
logger.info("Recomputing centroids after seed update")
|
| 448 |
+
self.centroids = self.build_prototypes(self.aspect_seeds, self.model)
|
| 449 |
+
|
| 450 |
+
# Persist
|
| 451 |
+
if persist:
|
| 452 |
+
try:
|
| 453 |
+
with open(self.seeds_path, 'w', encoding='utf-8') as f:
|
| 454 |
+
json.dump(new_seeds, f, indent=2, ensure_ascii=False)
|
| 455 |
+
logger.info(f"Persisted new seeds to {self.seeds_path}")
|
| 456 |
+
except Exception as e:
|
| 457 |
+
logger.error(f"Failed to persist seeds: {e}")
|
| 458 |
+
|
| 459 |
+
# Stats
|
| 460 |
+
stats = {
|
| 461 |
+
"num_aspects": len(new_seeds),
|
| 462 |
+
"avg_seed_count": np.mean([len(seeds) for seeds in new_seeds.values()]),
|
| 463 |
+
"timestamp": datetime.utcnow().isoformat() + 'Z'
|
| 464 |
+
}
|
| 465 |
+
|
| 466 |
+
logger.info(f"Aspect seeds updated: {stats}")
|
| 467 |
+
return stats
|
| 468 |
+
|
| 469 |
+
def suggest_seed_expansions(self, corpus_texts: List[str], aspect_key: str,
|
| 470 |
+
top_n: int = 20) -> List[str]:
|
| 471 |
+
"""
|
| 472 |
+
Suggest seed expansions from corpus
|
| 473 |
+
Uses TF-IDF + cosine similarity for lightweight extraction
|
| 474 |
+
"""
|
| 475 |
+
if aspect_key not in self.centroids:
|
| 476 |
+
return []
|
| 477 |
+
|
| 478 |
+
centroid = self.centroids[aspect_key]
|
| 479 |
+
|
| 480 |
+
# Extract candidate phrases from corpus
|
| 481 |
+
from collections import Counter
|
| 482 |
+
import re
|
| 483 |
+
|
| 484 |
+
candidates = []
|
| 485 |
+
for text in corpus_texts[:100]: # Limit corpus
|
| 486 |
+
# Extract 2-5 word n-grams
|
| 487 |
+
words = text.lower().split()
|
| 488 |
+
for n in range(2, 6):
|
| 489 |
+
for i in range(len(words) - n + 1):
|
| 490 |
+
phrase = ' '.join(words[i:i+n])
|
| 491 |
+
if len(phrase) > 10 and not re.search(r'\d{3,}', phrase):
|
| 492 |
+
candidates.append(phrase)
|
| 493 |
+
|
| 494 |
+
# Count frequency
|
| 495 |
+
phrase_counts = Counter(candidates)
|
| 496 |
+
top_candidates = [phrase for phrase, _ in phrase_counts.most_common(200)]
|
| 497 |
+
|
| 498 |
+
if not top_candidates:
|
| 499 |
+
return []
|
| 500 |
+
|
| 501 |
+
# Encode and rank by similarity
|
| 502 |
+
candidate_embeddings = self.model.encode(top_candidates, convert_to_tensor=False,
|
| 503 |
+
show_progress_bar=False)
|
| 504 |
+
candidate_embeddings = np.array(candidate_embeddings, dtype=np.float32)
|
| 505 |
+
|
| 506 |
+
sims = np.dot(candidate_embeddings, centroid) / (
|
| 507 |
+
np.linalg.norm(candidate_embeddings, axis=1) * np.linalg.norm(centroid) + 1e-8
|
| 508 |
+
)
|
| 509 |
+
|
| 510 |
+
# Return top_n
|
| 511 |
+
top_indices = np.argsort(sims)[-top_n:][::-1]
|
| 512 |
+
suggestions = [top_candidates[i] for i in top_indices]
|
| 513 |
+
|
| 514 |
+
return suggestions
|
| 515 |
+
|
| 516 |
+
|
| 517 |
+
def get_relevant_aspects_for_question(question_id: str) -> List[str]:
|
| 518 |
+
"""Get relevant aspect keys for a question"""
|
| 519 |
+
return QUESTION_ASPECT_MAP.get(question_id, [])
|
| 520 |
+
|
| 521 |
+
|
| 522 |
+
# Flask admin blueprint
|
| 523 |
+
def register_admin_seed_endpoint(app, text_module: TextModuleV2):
|
| 524 |
+
"""Register admin endpoints for seed management"""
|
| 525 |
+
from flask import Blueprint, request, jsonify
|
| 526 |
+
|
| 527 |
+
admin_bp = Blueprint('admin_aspects', __name__, url_prefix='/admin')
|
| 528 |
+
|
| 529 |
+
def check_admin_token():
|
| 530 |
+
token = request.headers.get('X-Admin-Token')
|
| 531 |
+
expected = os.getenv('ADMIN_SEED_TOKEN', 'admin-secret-token')
|
| 532 |
+
if token != expected:
|
| 533 |
+
return jsonify({'error': 'Unauthorized'}), 401
|
| 534 |
+
return None
|
| 535 |
+
|
| 536 |
+
@admin_bp.route('/aspect-seeds', methods=['GET'])
|
| 537 |
+
def get_seeds():
|
| 538 |
+
"""Get current aspect seeds"""
|
| 539 |
+
auth_err = check_admin_token()
|
| 540 |
+
if auth_err:
|
| 541 |
+
return auth_err
|
| 542 |
+
|
| 543 |
+
seeds = text_module.get_aspect_seeds()
|
| 544 |
+
return jsonify({
|
| 545 |
+
'success': True,
|
| 546 |
+
'seeds': seeds,
|
| 547 |
+
'num_aspects': len(seeds)
|
| 548 |
+
})
|
| 549 |
+
|
| 550 |
+
@admin_bp.route('/aspect-seeds', methods=['POST'])
|
| 551 |
+
def update_seeds():
|
| 552 |
+
"""Update aspect seeds"""
|
| 553 |
+
auth_err = check_admin_token()
|
| 554 |
+
if auth_err:
|
| 555 |
+
return auth_err
|
| 556 |
+
|
| 557 |
+
data = request.json
|
| 558 |
+
new_seeds = data.get('seeds')
|
| 559 |
+
persist = data.get('persist', True)
|
| 560 |
+
|
| 561 |
+
if not new_seeds:
|
| 562 |
+
return jsonify({'error': 'Missing seeds field'}), 400
|
| 563 |
+
|
| 564 |
+
try:
|
| 565 |
+
stats = text_module.update_aspect_seeds(new_seeds, persist=persist)
|
| 566 |
+
return jsonify({
|
| 567 |
+
'success': True,
|
| 568 |
+
'message': 'Aspect seeds updated successfully',
|
| 569 |
+
'stats': stats
|
| 570 |
+
})
|
| 571 |
+
except Exception as e:
|
| 572 |
+
logger.error(f"Failed to update seeds: {e}")
|
| 573 |
+
return jsonify({'error': str(e)}), 400
|
| 574 |
+
|
| 575 |
+
app.register_blueprint(admin_bp)
|
| 576 |
+
logger.info("Registered admin aspect-seed endpoints at /admin/aspect-seeds")
|
services/universal_module.py
ADDED
|
@@ -0,0 +1,240 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Universal Module - Academic & Experience Scoring"""
|
| 2 |
+
import numpy as np
|
| 3 |
+
import re
|
| 4 |
+
from typing import Dict, Tuple
|
| 5 |
+
|
| 6 |
+
class UniversalModule:
|
| 7 |
+
"""Scores based on academic performance and experience"""
|
| 8 |
+
|
| 9 |
+
def __init__(self):
|
| 10 |
+
self.feature_weights = {
|
| 11 |
+
'cgpa_norm': 0.30,
|
| 12 |
+
'sgpa_trend': 0.15,
|
| 13 |
+
'sgpa_consistency': 0.10,
|
| 14 |
+
'marks_consistency': 0.10,
|
| 15 |
+
'academic_improvement': 0.10,
|
| 16 |
+
'internship_exposure': 0.10,
|
| 17 |
+
'ec_quality': 0.08,
|
| 18 |
+
'cert_quality': 0.07
|
| 19 |
+
}
|
| 20 |
+
|
| 21 |
+
def score(self, student_data: Dict) -> Tuple[float, float, Dict]:
|
| 22 |
+
"""
|
| 23 |
+
Calculate universal score
|
| 24 |
+
Returns: (score, confidence, features_dict)
|
| 25 |
+
"""
|
| 26 |
+
features = {}
|
| 27 |
+
|
| 28 |
+
# CGPA normalization (0-10 scale)
|
| 29 |
+
cgpa = student_data.get('cgpa', 0)
|
| 30 |
+
features['cgpa_norm'] = min(cgpa / 10.0, 1.0)
|
| 31 |
+
|
| 32 |
+
# SGPA trend (improvement across semesters) - filter out null values
|
| 33 |
+
sgpa_values = []
|
| 34 |
+
for sem_num in range(1, 9):
|
| 35 |
+
sem_val = student_data.get(f'sgpa_sem{sem_num}')
|
| 36 |
+
if sem_val is not None and sem_val > 0: # Ignore null/zero values
|
| 37 |
+
sgpa_values.append(sem_val)
|
| 38 |
+
|
| 39 |
+
if len(sgpa_values) >= 2:
|
| 40 |
+
# Calculate trend from first to last available semester
|
| 41 |
+
trend = (sgpa_values[-1] - sgpa_values[0]) / 10.0 # Normalize
|
| 42 |
+
features['sgpa_trend'] = max(0, min(trend + 0.5, 1.0)) # Center at 0.5
|
| 43 |
+
else:
|
| 44 |
+
features['sgpa_trend'] = 0.5 # Neutral if insufficient data
|
| 45 |
+
|
| 46 |
+
# SGPA consistency (lower std = more consistent = better)
|
| 47 |
+
if len(sgpa_values) >= 3:
|
| 48 |
+
std_dev = np.std(sgpa_values)
|
| 49 |
+
features['sgpa_consistency'] = max(0, 1 - (std_dev / 3.0)) # Inverse relationship
|
| 50 |
+
else:
|
| 51 |
+
features['sgpa_consistency'] = 0.5
|
| 52 |
+
|
| 53 |
+
# Marks consistency across 10th, 12th, CGPA
|
| 54 |
+
tenth = student_data.get('tenth_pct')
|
| 55 |
+
twelfth = student_data.get('twelfth_pct')
|
| 56 |
+
|
| 57 |
+
if tenth and twelfth and cgpa:
|
| 58 |
+
cgpa_pct = (cgpa / 10.0) * 100
|
| 59 |
+
marks_std = np.std([tenth, twelfth, cgpa_pct])
|
| 60 |
+
features['marks_consistency'] = max(0, 1 - (marks_std / 30.0))
|
| 61 |
+
else:
|
| 62 |
+
features['marks_consistency'] = 0.5
|
| 63 |
+
|
| 64 |
+
# Academic improvement flag
|
| 65 |
+
if tenth and twelfth and cgpa:
|
| 66 |
+
cgpa_pct = (cgpa / 10.0) * 100
|
| 67 |
+
if cgpa_pct > twelfth and twelfth > tenth:
|
| 68 |
+
features['academic_improvement'] = 1.0
|
| 69 |
+
elif cgpa_pct > twelfth or twelfth > tenth:
|
| 70 |
+
features['academic_improvement'] = 0.7
|
| 71 |
+
else:
|
| 72 |
+
features['academic_improvement'] = 0.3
|
| 73 |
+
else:
|
| 74 |
+
features['academic_improvement'] = 0.5
|
| 75 |
+
|
| 76 |
+
# Extract features from text responses (handle None values)
|
| 77 |
+
internship_text = student_data.get('internship_text') or ''
|
| 78 |
+
ec_text = student_data.get('extracurricular_text') or ''
|
| 79 |
+
cert_text = student_data.get('certifications_text') or ''
|
| 80 |
+
|
| 81 |
+
# Internship exposure - extract from text
|
| 82 |
+
features['internship_exposure'] = self._assess_internship_quality(internship_text)
|
| 83 |
+
|
| 84 |
+
# Extracurricular quality - extract from text
|
| 85 |
+
features['ec_quality'] = self._assess_extracurricular_quality(ec_text)
|
| 86 |
+
|
| 87 |
+
# Certification quality - extract from text
|
| 88 |
+
features['cert_quality'] = self._assess_certification_quality(cert_text)
|
| 89 |
+
|
| 90 |
+
# Calculate weighted score
|
| 91 |
+
score = sum(features[k] * self.feature_weights[k] for k in features.keys())
|
| 92 |
+
|
| 93 |
+
# Calculate confidence based on data completeness
|
| 94 |
+
total_fields = 8
|
| 95 |
+
filled_fields = sum([
|
| 96 |
+
1 if cgpa > 0 else 0,
|
| 97 |
+
1 if len(sgpa_values) >= 2 else 0,
|
| 98 |
+
1 if len(sgpa_values) >= 3 else 0,
|
| 99 |
+
1 if tenth and twelfth else 0,
|
| 100 |
+
1 if tenth and twelfth and cgpa else 0,
|
| 101 |
+
1 if len(internship_text) > 20 else 0,
|
| 102 |
+
1 if len(ec_text) > 20 else 0,
|
| 103 |
+
1 if len(cert_text) > 20 else 0
|
| 104 |
+
])
|
| 105 |
+
confidence = filled_fields / total_fields
|
| 106 |
+
|
| 107 |
+
return score, confidence, features
|
| 108 |
+
|
| 109 |
+
def explain(self, features: Dict) -> Dict:
|
| 110 |
+
"""Generate explanation for scores"""
|
| 111 |
+
explanations = {
|
| 112 |
+
'top_positive_features': [],
|
| 113 |
+
'top_negative_features': []
|
| 114 |
+
}
|
| 115 |
+
|
| 116 |
+
# Sort features by value
|
| 117 |
+
sorted_features = sorted(features.items(), key=lambda x: x[1], reverse=True)
|
| 118 |
+
|
| 119 |
+
# Top 3 positive
|
| 120 |
+
for feat, val in sorted_features[:3]:
|
| 121 |
+
if val > 0.6:
|
| 122 |
+
explanations['top_positive_features'].append({
|
| 123 |
+
'feature': feat,
|
| 124 |
+
'value': round(val, 2),
|
| 125 |
+
'description': self._get_feature_description(feat, val)
|
| 126 |
+
})
|
| 127 |
+
|
| 128 |
+
# Top 3 negative
|
| 129 |
+
for feat, val in sorted_features[-3:]:
|
| 130 |
+
if val < 0.4:
|
| 131 |
+
explanations['top_negative_features'].append({
|
| 132 |
+
'feature': feat,
|
| 133 |
+
'value': round(val, 2),
|
| 134 |
+
'description': self._get_feature_description(feat, val)
|
| 135 |
+
})
|
| 136 |
+
|
| 137 |
+
return explanations
|
| 138 |
+
|
| 139 |
+
def _assess_internship_quality(self, text: str) -> float:
|
| 140 |
+
"""Extract internship quality from text"""
|
| 141 |
+
if not text or len(text) < 20:
|
| 142 |
+
return 0.0
|
| 143 |
+
|
| 144 |
+
score = 0.0
|
| 145 |
+
text_lower = text.lower()
|
| 146 |
+
|
| 147 |
+
# Duration indicators
|
| 148 |
+
duration_patterns = [
|
| 149 |
+
(r'\b(\d+)\s*months?\b', 1.0),
|
| 150 |
+
(r'\b(\d+)\s*weeks?\b', 0.25),
|
| 151 |
+
(r'summer\s+internship', 0.5),
|
| 152 |
+
(r'year\s+long|full\s+year|annual', 1.0),
|
| 153 |
+
]
|
| 154 |
+
|
| 155 |
+
max_duration_score = 0.0
|
| 156 |
+
for pattern, multiplier in duration_patterns:
|
| 157 |
+
matches = re.findall(pattern, text_lower)
|
| 158 |
+
if matches:
|
| 159 |
+
if pattern.startswith(r'\b(\d+)'):
|
| 160 |
+
duration = max([int(m) for m in matches]) * multiplier
|
| 161 |
+
max_duration_score = max(max_duration_score, min(duration / 6.0, 1.0))
|
| 162 |
+
else:
|
| 163 |
+
max_duration_score = max(max_duration_score, multiplier)
|
| 164 |
+
|
| 165 |
+
score += max_duration_score * 0.4
|
| 166 |
+
|
| 167 |
+
# Quality indicators
|
| 168 |
+
quality_keywords = ['company', 'startup', 'corporation', 'project', 'developed',
|
| 169 |
+
'implemented', 'built', 'deployed', 'managed', 'led']
|
| 170 |
+
quality_count = sum(1 for kw in quality_keywords if kw in text_lower)
|
| 171 |
+
score += min(quality_count / len(quality_keywords), 1.0) * 0.4
|
| 172 |
+
|
| 173 |
+
# Length indicates detail
|
| 174 |
+
score += min(len(text) / 500, 1.0) * 0.2
|
| 175 |
+
|
| 176 |
+
return min(score, 1.0)
|
| 177 |
+
|
| 178 |
+
def _assess_extracurricular_quality(self, text: str) -> float:
|
| 179 |
+
"""Extract extracurricular quality from text"""
|
| 180 |
+
if not text or len(text) < 20:
|
| 181 |
+
return 0.0
|
| 182 |
+
|
| 183 |
+
score = 0.0
|
| 184 |
+
text_lower = text.lower()
|
| 185 |
+
|
| 186 |
+
# Leadership indicators
|
| 187 |
+
leadership_keywords = ['led', 'organized', 'president', 'captain', 'head',
|
| 188 |
+
'coordinator', 'managed', 'founded']
|
| 189 |
+
leadership_count = sum(1 for kw in leadership_keywords if kw in text_lower)
|
| 190 |
+
score += min(leadership_count / 3, 1.0) * 0.4
|
| 191 |
+
|
| 192 |
+
# Activity types
|
| 193 |
+
activity_keywords = ['club', 'society', 'competition', 'hackathon', 'event',
|
| 194 |
+
'volunteer', 'sports', 'cultural', 'technical']
|
| 195 |
+
activity_count = sum(1 for kw in activity_keywords if kw in text_lower)
|
| 196 |
+
score += min(activity_count / 4, 1.0) * 0.4
|
| 197 |
+
|
| 198 |
+
# Detail level
|
| 199 |
+
score += min(len(text) / 400, 1.0) * 0.2
|
| 200 |
+
|
| 201 |
+
return min(score, 1.0)
|
| 202 |
+
|
| 203 |
+
def _assess_certification_quality(self, text: str) -> float:
|
| 204 |
+
"""Extract certification quality from text"""
|
| 205 |
+
if not text or len(text) < 20:
|
| 206 |
+
return 0.0
|
| 207 |
+
|
| 208 |
+
score = 0.0
|
| 209 |
+
text_lower = text.lower()
|
| 210 |
+
|
| 211 |
+
# Platform indicators (reputable sources)
|
| 212 |
+
platform_keywords = ['coursera', 'udemy', 'edx', 'linkedin', 'google',
|
| 213 |
+
'microsoft', 'aws', 'azure', 'ibm', 'oracle']
|
| 214 |
+
platform_count = sum(1 for kw in platform_keywords if kw in text_lower)
|
| 215 |
+
score += min(platform_count / 3, 1.0) * 0.4
|
| 216 |
+
|
| 217 |
+
# Technical skills
|
| 218 |
+
tech_keywords = ['python', 'java', 'machine learning', 'data science', 'cloud',
|
| 219 |
+
'programming', 'development', 'database', 'web', 'mobile']
|
| 220 |
+
tech_count = sum(1 for kw in tech_keywords if kw in text_lower)
|
| 221 |
+
score += min(tech_count / 4, 1.0) * 0.4
|
| 222 |
+
|
| 223 |
+
# Detail level
|
| 224 |
+
score += min(len(text) / 400, 1.0) * 0.2
|
| 225 |
+
|
| 226 |
+
return min(score, 1.0)
|
| 227 |
+
|
| 228 |
+
def _get_feature_description(self, feature: str, value: float) -> str:
|
| 229 |
+
"""Get human-readable description of feature"""
|
| 230 |
+
descriptions = {
|
| 231 |
+
'cgpa_norm': f"CGPA performance: {value*10:.1f}/10",
|
| 232 |
+
'sgpa_trend': "Strong upward trend in semester grades" if value > 0.6 else "Declining semester grades",
|
| 233 |
+
'sgpa_consistency': "Very consistent semester performance" if value > 0.7 else "Inconsistent semester performance",
|
| 234 |
+
'marks_consistency': "Consistent performance across academics" if value > 0.7 else "Variable academic performance",
|
| 235 |
+
'academic_improvement': "Clear improvement over time" if value > 0.7 else "Limited academic growth",
|
| 236 |
+
'internship_exposure': "Strong internship experience" if value > 0.6 else "Limited internship exposure",
|
| 237 |
+
'ec_quality': "Excellent extracurricular involvement" if value > 0.6 else "Limited extracurricular activities",
|
| 238 |
+
'cert_quality': "Strong certification portfolio" if value > 0.6 else "Few professional certifications"
|
| 239 |
+
}
|
| 240 |
+
return descriptions.get(feature, feature)
|