Spaces:

parthnuwal7
/

FCT

Sleeping

App Files Files Community

Parthnuwal7 commited on Dec 9, 2025

Commit

3d015cd

0 Parent(s):

Adding analytical content

Browse files

Files changed (39) hide show

.gitattributes +35 -0
.gitignore +28 -0
Dockerfile +28 -0
README.md +11 -0
app.py +53 -0
aspect_seeds.json +209 -0
config.py +22 -0
database/add_semester_columns.sql +9 -0
database/db.py +10 -0
database/migrate_domain_module.sql +113 -0
database/migrate_to_text_fields.sql +17 -0
database/schema.sql +151 -0
domains/data_science.json +153 -0
domains/mechanical_engineering.json +148 -0
domains/software_engineering.json +162 -0
models/personality_responses.py +48 -0
models/student.py +42 -0
models/text_responses.py +39 -0
requirements.txt +12 -0
routes/domain.py +202 -0
routes/scoring.py +204 -0
routes/students.py +173 -0
services/README_text_v2.md +210 -0
services/batch_aggregation.py +414 -0
services/domain_knowledge_base.py +250 -0
services/domain_plugins/__init__.py +10 -0
services/domain_plugins/base_plugin.py +117 -0
services/domain_plugins/business_plugin.py +205 -0
services/domain_plugins/creative_plugin.py +210 -0
services/domain_plugins/plugin_factory.py +73 -0
services/domain_plugins/research_plugin.py +228 -0
services/domain_plugins/tech_plugin.py +270 -0
services/fidelity_transformer.py +481 -0
services/fusion.py +150 -0
services/personality_module.py +132 -0
services/student_output.py +411 -0
services/text_module.py +211 -0
services/text_module_v2.py +576 -0
services/universal_module.py +240 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,35 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,28 @@

+# Environment
+.env
+.env.example
+.env.local
+# Virtual environments
+.venv/
+venv/
+env/
+# Python cache
+__pycache__/
+*.py[cod]
+*$py.class
+*.pyo
+# IDE
+.idea/
+.vscode/
+*.swp
+# OS
+.DS_Store
+Thumbs.db
+# Cached centroids
+*.npz
+aspect_centroids.npz

Dockerfile ADDED Viewed

	@@ -0,0 +1,28 @@

+FROM python:3.10-slim
+WORKDIR /app
+# Install system dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential \
+    && rm -rf /var/lib/apt/lists/*
+# Copy requirements first for caching
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy application code
+COPY . .
+# Create directories for caching
+RUN mkdir -p /app/cache
+# HuggingFace Spaces uses port 7860
+ENV PORT=7860
+ENV PYTHONUNBUFFERED=1
+# Expose the port
+EXPOSE 7860
+# Run the application
+CMD ["python", "app.py"]

README.md ADDED Viewed

	@@ -0,0 +1,11 @@

+---
+title: FCT
+emoji: 🦀
+colorFrom: yellow
+colorTo: green
+sdk: docker
+pinned: false
+license: other
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,53 @@

+"""Main Flask application for Analytics Module"""
+from flask import Flask
+from flask_cors import CORS
+from config import Config
+# Initialize Flask app
+app = Flask(__name__)
+app.config.from_object(Config)
+CORS(app)
+# Register blueprints
+from routes.students import students_bp
+from routes.scoring import scoring_bp
+from routes.domain import domain_bp
+app.register_blueprint(students_bp, url_prefix='/api/analytics')
+app.register_blueprint(scoring_bp, url_prefix='/api/analytics')
+app.register_blueprint(domain_bp, url_prefix='/api/analytics')
+# Health check
+@app.route('/health', methods=['GET'])
+def health_check():
+    return {'status': 'healthy', 'service': 'analytics-api'}, 200
+@app.route('/', methods=['GET'])
+def home():
+    return {
+        'service': 'Student Profiling & Employability Scoring API',
+        'version': '1.0.0',
+        'endpoints': {
+            'students': '/api/analytics/students',
+            'personality': '/api/analytics/personality/<student_id>',
+            'text': '/api/analytics/text/<student_id>',
+            'score': '/api/analytics/score/<student_id>',
+            'leaderboard': '/api/analytics/leaderboard',
+            'domain': {
+                'available': '/api/analytics/domain/available',
+                'submit': '/api/analytics/students/<student_id>/domain-evidence',
+                'get': '/api/analytics/students/<student_id>/domain-evidence',
+                'delete': '/api/analytics/students/<student_id>/domain-evidence/<domain_type>'
+            }
+        }
+    }
+if __name__ == '__main__':
+    import os
+    port = int(os.getenv('PORT', 7860))  # HuggingFace uses 7860
+    debug = os.getenv('DEBUG', 'False').lower() == 'true'
+    print(f"🚀 Analytics API starting on port {port}")
+    print(f"📊 Scoring modules: Universal, Personality, Text, Domain (Tech/Business/Creative/Research)")
+    print(f"🔗 Base URL: http://0.0.0.0:{port}")
+    app.run(host='0.0.0.0', port=port, debug=debug)

aspect_seeds.json ADDED Viewed

	@@ -0,0 +1,209 @@

+{
+  "leadership": [
+    "led a team",
+    "was team lead",
+    "managed a project",
+    "supervised interns",
+    "coordinated a cross-functional team",
+    "organized the club",
+    "president of the society",
+    "captain of the team",
+    "ran weekly standups",
+    "delegated tasks",
+    "mentored junior members",
+    "headed the project",
+    "oversaw project timelines",
+    "chaired the committee",
+    "led end-to-end delivery",
+    "directed project milestones",
+    "led a 5-person team",
+    "managed stakeholders",
+    "took ownership of the initiative",
+    "led code reviews",
+    "organized campus events",
+    "led product demo sessions",
+    "led recruitment for volunteers",
+    "managed vendor relationships",
+    "spearheaded the outreach program"
+  ],
+  "technical_skills": [
+    "developed a web API",
+    "implemented RESTful services",
+    "coded in python",
+    "built machine learning models",
+    "trained neural networks",
+    "implemented data pipelines",
+    "used pandas for ETL",
+    "designed database schemas",
+    "built microservices",
+    "deployed models using docker",
+    "worked with FastAPI",
+    "implemented CI/CD",
+    "wrote unit tests",
+    "optimized SQL queries",
+    "used scikit-learn",
+    "developed recommendation systems",
+    "built feature engineering pipelines",
+    "deployed to cloud",
+    "developed ETL jobs",
+    "worked with Kafka",
+    "implemented caching layers",
+    "used TensorFlow or PyTorch",
+    "built backend services",
+    "wrote production-grade code",
+    "integrated third-party APIs"
+  ],
+  "problem_solving": [
+    "solved complex problem",
+    "debugged production issues",
+    "optimized an algorithm",
+    "reduced latency of service",
+    "designed a scalable solution",
+    "investigated root cause",
+    "improved system reliability",
+    "created a novel solution",
+    "troubleshot integration issues",
+    "automated manual tasks",
+    "reduced memory usage",
+    "resolved data pipeline failures",
+    "refactored critical code",
+    "handled edge cases",
+    "iterated on prototypes",
+    "performed A/B testing to decide",
+    "diagnosed performance bottlenecks",
+    "designed fallback strategies",
+    "resolved deployment failures",
+    "created monitoring & alerts"
+  ],
+  "internships_experience": [
+    "summer internship",
+    "industrial training",
+    "interned at",
+    "worked as an intern",
+    "internship project",
+    "internship in data science",
+    "interned at a startup",
+    "completed internship at",
+    "interned with the engineering team",
+    "intern experience",
+    "interned at an e-commerce company",
+    "industrial internship",
+    "co-op placement",
+    "paid internship",
+    "research internship",
+    "interned as a software engineer",
+    "on-the-job training",
+    "worked under mentor",
+    "internship-driven project",
+    "corporate internship"
+  ],
+  "communication": [
+    "presented to stakeholders",
+    "gave a presentation",
+    "wrote documentation",
+    "authored reports",
+    "explained results to non-technical",
+    "public speaking",
+    "delivered demo",
+    "prepared slides",
+    "wrote user guides",
+    "communicated with clients",
+    "collaborated across teams",
+    "conducted knowledge transfer",
+    "wrote clear emails",
+    "explained technical concepts",
+    "presented project outcomes",
+    "led demo sessions",
+    "created onboarding docs",
+    "contributed to team discussions",
+    "led workshops",
+    "hosted training sessions"
+  ],
+  "teamwork": [
+    "collaborated with team",
+    "worked in a cross-functional team",
+    "paired programming",
+    "contributed to group project",
+    "supported teammates",
+    "collaborated on design",
+    "worked with designers and PMs",
+    "helped teammates debug",
+    "co-authored project",
+    "mentored peers",
+    "shared responsibilities",
+    "worked effectively in group",
+    "contributed in agile team",
+    "participated in sprints",
+    "assisted in integration"
+  ],
+  "project_execution": [
+    "delivered project on time",
+    "met project deadlines",
+    "managed milestones",
+    "handled project planning",
+    "released production features",
+    "coordinated deployment",
+    "delivered MVP",
+    "tracked KPIs",
+    "managed scope",
+    "created project timeline",
+    "ran retrospectives",
+    "managed feature rollout",
+    "ensured on-time delivery",
+    "performed release validations",
+    "deployed analytics dashboard",
+    "iterated based on feedback"
+  ],
+  "initiative": [
+    "initiated a project",
+    "proposed a new idea",
+    "took initiative",
+    "started a side project",
+    "built a proof of concept",
+    "started a campus chapter",
+    "created an automation",
+    "improved an existing process",
+    "volunteered to lead",
+    "identified improvement areas",
+    "launched a mini-product",
+    "ran a pilot program",
+    "created onboarding scripts",
+    "led process improvements",
+    "started a mentoring circle"
+  ],
+  "learning_agility": [
+    "quick learner",
+    "self-taught",
+    "learned new framework",
+    "picked up new language",
+    "adapted to new tech",
+    "completed online courses",
+    "upskilled via projects",
+    "transitioned domains",
+    "learned on the job",
+    "rapidly onboarded",
+    "attended workshops",
+    "completed bootcamp",
+    "took certification courses",
+    "learned through documentation",
+    "scaled knowledge quickly",
+    "adapted to changing scope"
+  ],
+  "career_alignment": [
+    "career goal is",
+    "aspire to become",
+    "interested in data science",
+    "pursue a role in product",
+    "long-term goal",
+    "want to specialize in",
+    "career objective",
+    "planning to pursue masters",
+    "aim to work in industry",
+    "seek product management roles",
+    "interested in research",
+    "want to join a startup",
+    "targeting roles in ML engineering",
+    "aiming for consulting roles",
+    "career path is focused on"
+  ]
+}

config.py ADDED Viewed

	@@ -0,0 +1,22 @@

+"""Configuration for Analytics Module"""
+import os
+from dotenv import load_dotenv
+load_dotenv()
+class Config:
+    # Supabase
+    SUPABASE_URL = os.getenv('SUPABASE_URL', 'https://hbesjuifeodgqrptpkch.supabase.co')
+    SUPABASE_KEY = os.getenv('SUPABASE_KEY', '')
+    # ML Models
+    SENTENCE_TRANSFORMER_MODEL = 'all-MiniLM-L6-v2'
+    # Scoring Weights
+    UNIVERSAL_WEIGHT = 0.50
+    PERSONALITY_WEIGHT = 0.25
+    TEXT_WEIGHT = 0.25
+    # Flask
+    DEBUG = os.getenv('DEBUG', 'True') == 'True'
+    PORT = int(os.getenv('PORT', 5001))

database/add_semester_columns.sql ADDED Viewed

	@@ -0,0 +1,9 @@

+-- Add missing semester columns to analytics_students table
+-- Run this in Supabase SQL Editor
+ALTER TABLE analytics_students
+ADD COLUMN IF NOT EXISTS sgpa_sem1 REAL CHECK (sgpa_sem1 >= 0 AND sgpa_sem1 <= 10),
+ADD COLUMN IF NOT EXISTS sgpa_sem2 REAL CHECK (sgpa_sem2 >= 0 AND sgpa_sem2 <= 10),
+ADD COLUMN IF NOT EXISTS sgpa_sem3 REAL CHECK (sgpa_sem3 >= 0 AND sgpa_sem3 <= 10),
+ADD COLUMN IF NOT EXISTS sgpa_sem7 REAL CHECK (sgpa_sem7 >= 0 AND sgpa_sem7 <= 10),
+ADD COLUMN IF NOT EXISTS sgpa_sem8 REAL CHECK (sgpa_sem8 >= 0 AND sgpa_sem8 <= 10);

database/db.py ADDED Viewed

	@@ -0,0 +1,10 @@

+"""Database connection and utilities"""
+from supabase import create_client, Client
+from config import Config
+# Initialize Supabase client
+supabase: Client = create_client(Config.SUPABASE_URL, Config.SUPABASE_KEY)
+def get_db():
+    """Get Supabase client instance"""
+    return supabase

database/migrate_domain_module.sql ADDED Viewed

	@@ -0,0 +1,113 @@

+-- Domain Module Migration Script
+-- Run this in Supabase SQL Editor to add domain-specific scoring support
+-- Date: December 9, 2025
+-- ============================================================================
+-- STEP 1: Add domain fields to existing analytics_students table
+-- ============================================================================
+ALTER TABLE analytics_students
+ADD COLUMN IF NOT EXISTS active_domain TEXT CHECK (active_domain IN ('tech', 'business', 'creative', 'research', NULL)),
+ADD COLUMN IF NOT EXISTS domain_score REAL CHECK (domain_score >= 0 AND domain_score <= 1),
+ADD COLUMN IF NOT EXISTS domain_confidence REAL CHECK (domain_confidence >= 0 AND domain_confidence <= 1);
+-- ============================================================================
+-- STEP 2: Create domain evidence table
+-- ============================================================================
+CREATE TABLE IF NOT EXISTS analytics_domain_evidence (
+    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+    student_id TEXT REFERENCES analytics_students(student_id) ON DELETE CASCADE,
+    domain_type TEXT NOT NULL CHECK (domain_type IN ('tech', 'business', 'creative', 'research')),
+    evidence_data JSONB NOT NULL,
+    domain_score REAL CHECK (domain_score >= 0 AND domain_score <= 1),
+    domain_confidence REAL CHECK (domain_confidence >= 0 AND domain_confidence <= 1),
+    raw_features JSONB,
+    processing_status TEXT DEFAULT 'pending' CHECK (processing_status IN ('pending', 'processing', 'completed', 'failed')),
+    error_message TEXT,
+    created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
+    updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
+    UNIQUE(student_id, domain_type)
+);
+-- ============================================================================
+-- STEP 3: Create indexes for performance
+-- ============================================================================
+CREATE INDEX IF NOT EXISTS idx_domain_evidence_student ON analytics_domain_evidence(student_id);
+CREATE INDEX IF NOT EXISTS idx_domain_evidence_type ON analytics_domain_evidence(domain_type);
+CREATE INDEX IF NOT EXISTS idx_domain_evidence_status ON analytics_domain_evidence(processing_status);
+-- ============================================================================
+-- STEP 4: Enable Row Level Security
+-- ============================================================================
+ALTER TABLE analytics_domain_evidence ENABLE ROW LEVEL SECURITY;
+-- ============================================================================
+-- STEP 5: Create RLS Policies for domain evidence
+-- ============================================================================
+-- Users can view their own domain evidence
+CREATE POLICY "Users can view own domain evidence"
+ON analytics_domain_evidence FOR SELECT
+TO authenticated
+USING (student_id IN (SELECT student_id FROM analytics_students WHERE user_id = auth.uid()));
+-- Users can insert their own domain evidence
+CREATE POLICY "Users can insert own domain evidence"
+ON analytics_domain_evidence FOR INSERT
+TO authenticated
+WITH CHECK (student_id IN (SELECT student_id FROM analytics_students WHERE user_id = auth.uid()));
+-- Users can update their own domain evidence
+CREATE POLICY "Users can update own domain evidence"
+ON analytics_domain_evidence FOR UPDATE
+TO authenticated
+USING (student_id IN (SELECT student_id FROM analytics_students WHERE user_id = auth.uid()));
+-- Users can delete their own domain evidence
+CREATE POLICY "Users can delete own domain evidence"
+ON analytics_domain_evidence FOR DELETE
+TO authenticated
+USING (student_id IN (SELECT student_id FROM analytics_students WHERE user_id = auth.uid()));
+-- ============================================================================
+-- STEP 6: Verification queries (run these to verify successful migration)
+-- ============================================================================
+-- Check if columns were added
+SELECT column_name, data_type
+FROM information_schema.columns
+WHERE table_name = 'analytics_students'
+  AND column_name IN ('active_domain', 'domain_score', 'domain_confidence');
+-- Check if table was created
+SELECT table_name
+FROM information_schema.tables
+WHERE table_name = 'analytics_domain_evidence';
+-- Check if indexes were created
+SELECT indexname
+FROM pg_indexes
+WHERE tablename = 'analytics_domain_evidence';
+-- Check if RLS policies were created
+SELECT policyname
+FROM pg_policies
+WHERE tablename = 'analytics_domain_evidence';
+-- ============================================================================
+-- Migration Complete!
+-- ============================================================================
+-- Expected results:
+-- ✓ 3 new columns in analytics_students table
+-- ✓ 1 new table: analytics_domain_evidence
+-- ✓ 3 new indexes
+-- ✓ 4 new RLS policies
+-- Next steps:
+-- 1. Restart your Flask backend: python app.py
+-- 2. Test domain submission via API or frontend form
+-- 3. Verify score fusion includes domain component

database/migrate_to_text_fields.sql ADDED Viewed

	@@ -0,0 +1,17 @@

+-- Migration: Replace numeric fields with text fields for extracurricular, certifications, and internships
+-- Run this in Supabase SQL Editor
+-- Add new text columns
+ALTER TABLE analytics_students
+ADD COLUMN IF NOT EXISTS extracurricular_text TEXT,
+ADD COLUMN IF NOT EXISTS certifications_text TEXT,
+ADD COLUMN IF NOT EXISTS internship_text TEXT;
+-- Optional: Drop old numeric columns if you want to clean up
+-- Uncomment these lines after verifying the new text fields work
+-- ALTER TABLE analytics_students DROP COLUMN IF EXISTS extracurricular_count;
+-- ALTER TABLE analytics_students DROP COLUMN IF EXISTS certifications_count;
+-- ALTER TABLE analytics_students DROP COLUMN IF EXISTS internship_total_months;
+-- Note: If you want to keep both old and new columns during transition,
+-- you can skip dropping the old columns and they will coexist.

database/schema.sql ADDED Viewed

	@@ -0,0 +1,151 @@

+-- Analytics Module Schema for Supabase
+-- Run this in Supabase SQL Editor
+-- 1. Students Table
+CREATE TABLE IF NOT EXISTS analytics_students (
+    student_id TEXT PRIMARY KEY,
+    user_id UUID REFERENCES auth.users(id) ON DELETE CASCADE,
+    cgpa REAL NOT NULL CHECK (cgpa >= 0 AND cgpa <= 10),
+    sgpa_sem1 REAL CHECK (sgpa_sem1 >= 0 AND sgpa_sem1 <= 10),
+    sgpa_sem2 REAL CHECK (sgpa_sem2 >= 0 AND sgpa_sem2 <= 10),
+    sgpa_sem3 REAL CHECK (sgpa_sem3 >= 0 AND sgpa_sem3 <= 10),
+    sgpa_sem4 REAL CHECK (sgpa_sem4 >= 0 AND sgpa_sem4 <= 10),
+    sgpa_sem5 REAL CHECK (sgpa_sem5 >= 0 AND sgpa_sem5 <= 10),
+    sgpa_sem6 REAL CHECK (sgpa_sem6 >= 0 AND sgpa_sem6 <= 10),
+    sgpa_sem7 REAL CHECK (sgpa_sem7 >= 0 AND sgpa_sem7 <= 10),
+    sgpa_sem8 REAL CHECK (sgpa_sem8 >= 0 AND sgpa_sem8 <= 10),
+    tenth_pct REAL CHECK (tenth_pct >= 0 AND tenth_pct <= 100),
+    twelfth_pct REAL CHECK (twelfth_pct >= 0 AND twelfth_pct <= 100),
+    extracurricular_text TEXT,
+    certifications_text TEXT,
+    internship_text TEXT,
+    active_domain TEXT CHECK (active_domain IN ('tech', 'business', 'creative', 'research', NULL)),
+    domain_score REAL CHECK (domain_score >= 0 AND domain_score <= 1),
+    domain_confidence REAL CHECK (domain_confidence >= 0 AND domain_confidence <= 1),
+    created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
+    updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
+);
+-- 2. Personality Responses Table
+CREATE TABLE IF NOT EXISTS analytics_personality_responses (
+    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+    student_id TEXT REFERENCES analytics_students(student_id) ON DELETE CASCADE,
+    p_q1 INTEGER CHECK (p_q1 >= 1 AND p_q1 <= 5),
+    p_q2 INTEGER CHECK (p_q2 >= 1 AND p_q2 <= 5),
+    p_q3 INTEGER CHECK (p_q3 >= 1 AND p_q3 <= 5),
+    p_q4 INTEGER CHECK (p_q4 >= 1 AND p_q4 <= 5),
+    p_q5 INTEGER CHECK (p_q5 >= 1 AND p_q5 <= 5),
+    p_q6 INTEGER CHECK (p_q6 >= 1 AND p_q6 <= 5),
+    p_q7 INTEGER CHECK (p_q7 >= 1 AND p_q7 <= 5),
+    p_q8 INTEGER CHECK (p_q8 >= 1 AND p_q8 <= 5),
+    p_q9 INTEGER CHECK (p_q9 >= 1 AND p_q9 <= 5),
+    p_q10 INTEGER CHECK (p_q10 >= 1 AND p_q10 <= 5),
+    p_q11 INTEGER CHECK (p_q11 >= 1 AND p_q11 <= 5),
+    p_q12 INTEGER CHECK (p_q12 >= 1 AND p_q12 <= 5),
+    p_q13 INTEGER CHECK (p_q13 >= 1 AND p_q13 <= 5),
+    p_q14 INTEGER CHECK (p_q14 >= 1 AND p_q14 <= 5),
+    p_q15 INTEGER CHECK (p_q15 >= 1 AND p_q15 <= 5),
+    p_q16 INTEGER CHECK (p_q16 >= 1 AND p_q16 <= 5),
+    p_q17 INTEGER CHECK (p_q17 >= 1 AND p_q17 <= 5),
+    p_q18 INTEGER CHECK (p_q18 >= 1 AND p_q18 <= 5),
+    p_q19 INTEGER CHECK (p_q19 >= 1 AND p_q19 <= 5),
+    p_q20 INTEGER CHECK (p_q20 >= 1 AND p_q20 <= 5),
+    created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
+    UNIQUE(student_id)
+);
+-- 3. Text Responses Table
+CREATE TABLE IF NOT EXISTS analytics_text_responses (
+    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+    student_id TEXT REFERENCES analytics_students(student_id) ON DELETE CASCADE,
+    text_q1 TEXT NOT NULL, -- Strengths
+    text_q2 TEXT NOT NULL, -- Career interests
+    text_q3 TEXT NOT NULL, -- Extracurriculars + leadership
+    created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
+    UNIQUE(student_id)
+);
+-- 4. Domain Evidence Table
+CREATE TABLE IF NOT EXISTS analytics_domain_evidence (
+    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+    student_id TEXT REFERENCES analytics_students(student_id) ON DELETE CASCADE,
+    domain_type TEXT NOT NULL CHECK (domain_type IN ('tech', 'business', 'creative', 'research')),
+    evidence_data JSONB NOT NULL, -- Flexible storage for domain-specific inputs
+    domain_score REAL CHECK (domain_score >= 0 AND domain_score <= 1),
+    domain_confidence REAL CHECK (domain_confidence >= 0 AND domain_confidence <= 1),
+    raw_features JSONB, -- Raw feature values for explainability
+    processing_status TEXT DEFAULT 'pending' CHECK (processing_status IN ('pending', 'processing', 'completed', 'failed')),
+    error_message TEXT,
+    created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
+    updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
+    UNIQUE(student_id, domain_type)
+);
+-- Indexes
+CREATE INDEX IF NOT EXISTS idx_analytics_students_user_id ON analytics_students(user_id);
+CREATE INDEX IF NOT EXISTS idx_personality_student ON analytics_personality_responses(student_id);
+CREATE INDEX IF NOT EXISTS idx_text_student ON analytics_text_responses(student_id);
+CREATE INDEX IF NOT EXISTS idx_domain_evidence_student ON analytics_domain_evidence(student_id);
+CREATE INDEX IF NOT EXISTS idx_domain_evidence_type ON analytics_domain_evidence(domain_type);
+CREATE INDEX IF NOT EXISTS idx_domain_evidence_status ON analytics_domain_evidence(processing_status);
+-- RLS Policies
+ALTER TABLE analytics_students ENABLE ROW LEVEL SECURITY;
+ALTER TABLE analytics_personality_responses ENABLE ROW LEVEL SECURITY;
+ALTER TABLE analytics_text_responses ENABLE ROW LEVEL SECURITY;
+-- Students can view/update their own data
+CREATE POLICY "Users can view own analytics data"
+ON analytics_students FOR SELECT
+TO authenticated
+USING (user_id = auth.uid());
+CREATE POLICY "Users can insert own analytics data"
+ON analytics_students FOR INSERT
+TO authenticated
+WITH CHECK (user_id = auth.uid());
+CREATE POLICY "Users can update own analytics data"
+ON analytics_students FOR UPDATE
+TO authenticated
+USING (user_id = auth.uid());
+-- Personality responses
+CREATE POLICY "Users can view own personality responses"
+ON analytics_personality_responses FOR SELECT
+TO authenticated
+USING (student_id IN (SELECT student_id FROM analytics_students WHERE user_id = auth.uid()));
+CREATE POLICY "Users can insert own personality responses"
+ON analytics_personality_responses FOR INSERT
+-- Text responses
+CREATE POLICY "Users can view own text responses"
+ON analytics_text_responses FOR SELECT
+TO authenticated
+USING (student_id IN (SELECT student_id FROM analytics_students WHERE user_id = auth.uid()));
+CREATE POLICY "Users can insert own text responses"
+ON analytics_text_responses FOR INSERT
+TO authenticated
+WITH CHECK (student_id IN (SELECT student_id FROM analytics_students WHERE user_id = auth.uid()));
+-- Domain evidence
+ALTER TABLE analytics_domain_evidence ENABLE ROW LEVEL SECURITY;
+CREATE POLICY "Users can view own domain evidence"
+ON analytics_domain_evidence FOR SELECT
+TO authenticated
+USING (student_id IN (SELECT student_id FROM analytics_students WHERE user_id = auth.uid()));
+CREATE POLICY "Users can insert own domain evidence"
+ON analytics_domain_evidence FOR INSERT
+TO authenticated
+WITH CHECK (student_id IN (SELECT student_id FROM analytics_students WHERE user_id = auth.uid()));
+CREATE POLICY "Users can update own domain evidence"
+ON analytics_domain_evidence FOR UPDATE
+TO authenticated
+USING (student_id IN (SELECT student_id FROM analytics_students WHERE user_id = auth.uid()));
+ON analytics_text_responses FOR INSERT
+TO authenticated
+WITH CHECK (student_id IN (SELECT student_id FROM analytics_students WHERE user_id = auth.uid()));

domains/data_science.json ADDED Viewed

	@@ -0,0 +1,153 @@

+{
+    "domain_id": "data_science",
+    "display_name": "Data Science & Analytics",
+    "description": "Machine Learning, Data Analysis, AI Research, and Business Intelligence",
+    "core_skills": [
+        "python",
+        "r",
+        "sql",
+        "pandas",
+        "numpy",
+        "scikit_learn",
+        "tensorflow",
+        "pytorch",
+        "keras",
+        "xgboost",
+        "tableau",
+        "power_bi",
+        "matplotlib",
+        "seaborn",
+        "statistics",
+        "ab_testing",
+        "feature_engineering",
+        "spark",
+        "hadoop",
+        "airflow",
+        "dbt"
+    ],
+    "aspect_prototypes": {
+        "technical_skills": [
+            "built machine learning models using scikit-learn and XGBoost",
+            "developed deep learning pipelines with PyTorch",
+            "created ETL jobs using PySpark for big data processing",
+            "trained neural networks for image classification",
+            "implemented NLP models using transformers and BERT",
+            "designed feature engineering pipelines for ML",
+            "built recommendation systems using collaborative filtering",
+            "deployed ML models to production with MLflow",
+            "created interactive dashboards in Tableau",
+            "performed A/B testing with statistical significance analysis"
+        ],
+        "problem_solving": [
+            "improved model accuracy from 78% to 92% through feature engineering",
+            "reduced model training time by 60% using distributed computing",
+            "diagnosed and fixed data leakage in ML pipeline",
+            "optimized hyperparameters using Bayesian optimization",
+            "handled class imbalance with SMOTE and weighted sampling",
+            "debugged data quality issues affecting model performance",
+            "designed experiment to measure causal impact of recommendation",
+            "created automated anomaly detection system",
+            "resolved data drift issues in production models",
+            "built interpretable models for regulatory compliance"
+        ],
+        "leadership": [
+            "led data science team of 4 on personalization project",
+            "presented ML insights to C-level stakeholders",
+            "coordinated with engineering for model deployment",
+            "organized data science reading group in company",
+            "mentored junior analysts on SQL and Python",
+            "drove adoption of MLOps best practices",
+            "led cross-functional project with marketing team",
+            "managed data labeling team for annotation project",
+            "conducted training sessions on Pandas for analysts",
+            "championed experiment-driven decision making culture"
+        ],
+        "internship_experience": [
+            "data science intern at Flipkart building recommendation models",
+            "ML research intern at Google Brain working on NLP",
+            "analytics intern at McKinsey for retail optimization",
+            "AI intern at NVIDIA on computer vision projects",
+            "research intern at IISc on deep learning",
+            "data analyst intern at Zomato for demand forecasting",
+            "business intelligence intern at Amazon building dashboards",
+            "ML platform intern at Meta for model serving",
+            "quantitative research intern at Goldman Sachs",
+            "applied scientist intern at AWS on personalization"
+        ]
+    },
+    "industry_benchmarks": {
+        "min_employability_score": 0.65,
+        "expected_cgpa": 8.0,
+        "expected_internship_months": 4,
+        "critical_skills": [
+            "python",
+            "sql",
+            "statistics",
+            "ml_fundamentals"
+        ],
+        "nice_to_have_skills": [
+            "deep_learning",
+            "spark",
+            "mlops",
+            "cloud"
+        ]
+    },
+    "skill_gaps_mapping": {
+        "deep_learning": {
+            "demand_score": 0.80,
+            "courses": [
+                "Deep Learning Specialization",
+                "Fast.ai",
+                "Stanford CS231n"
+            ],
+            "certifications": [
+                "TensorFlow Developer",
+                "PyTorch Certified"
+            ]
+        },
+        "mlops": {
+            "demand_score": 0.75,
+            "courses": [
+                "MLOps Specialization",
+                "ML Engineering for Production"
+            ],
+            "certifications": [
+                "AWS ML Specialty",
+                "GCP ML Engineer"
+            ]
+        },
+        "statistics": {
+            "demand_score": 0.70,
+            "courses": [
+                "Statistics for Data Science",
+                "A/B Testing Masterclass"
+            ],
+            "certifications": []
+        },
+        "big_data": {
+            "demand_score": 0.65,
+            "courses": [
+                "Spark for Data Engineering",
+                "Databricks Academy"
+            ],
+            "certifications": [
+                "Databricks Certified",
+                "Cloudera CCA"
+            ]
+        }
+    },
+    "detection_keywords": [
+        "data science",
+        "machine learning",
+        "deep learning",
+        "ai",
+        "analytics",
+        "data analyst",
+        "ml engineer",
+        "research scientist",
+        "business intelligence",
+        "statistical modeling",
+        "predictive analytics",
+        "data mining"
+    ]
+}

domains/mechanical_engineering.json ADDED Viewed

	@@ -0,0 +1,148 @@

+{
+    "domain_id": "mechanical_engineering",
+    "display_name": "Mechanical Engineering",
+    "description": "Design, Manufacturing, Automotive, and Core Engineering",
+    "core_skills": [
+        "autocad",
+        "solidworks",
+        "catia",
+        "ansys",
+        "matlab",
+        "thermodynamics",
+        "fluid_mechanics",
+        "heat_transfer",
+        "manufacturing",
+        "cnc",
+        "3d_printing",
+        "gd_t",
+        "fea",
+        "cfd",
+        "product_design",
+        "quality_control"
+    ],
+    "aspect_prototypes": {
+        "technical_skills": [
+            "designed complex assemblies in SolidWorks and CATIA",
+            "performed FEA analysis using ANSYS for structural optimization",
+            "created CFD simulations for fluid flow optimization",
+            "developed CNC programs for precision machining",
+            "implemented GD&T for manufacturing tolerances",
+            "designed heat exchangers using thermal analysis",
+            "prototyped parts using 3D printing and rapid prototyping",
+            "conducted DFMEA for product reliability",
+            "created engineering drawings following ASME standards",
+            "optimized product design reducing weight by 20%"
+        ],
+        "problem_solving": [
+            "resolved vibration issue in rotating machinery",
+            "optimized manufacturing process reducing cycle time by 30%",
+            "diagnosed failure mode using root cause analysis",
+            "redesigned component eliminating stress concentration",
+            "improved product yield from 85% to 98% through quality control",
+            "solved thermal management problem in electronic enclosure",
+            "reduced material waste by 25% through lean manufacturing",
+            "fixed tolerance stack-up issue causing assembly problems",
+            "automated inspection process using machine vision",
+            "designed jig and fixture reducing setup time"
+        ],
+        "leadership": [
+            "led BAJA SAE team of 20 members as captain",
+            "managed product development project from concept to production",
+            "coordinated with suppliers for component sourcing",
+            "organized SAE chapter events with 200+ participants",
+            "mentored junior designers on CAD and simulation tools",
+            "led quality improvement initiative on production floor",
+            "managed cross-functional team for product launch",
+            "conducted design reviews with stakeholders",
+            "led vendor qualification and development program",
+            "organized technical workshops on new manufacturing methods"
+        ],
+        "internship_experience": [
+            "6 months design intern at Tata Motors in R&D division",
+            "summer internship at Mahindra on EV powertrain",
+            "manufacturing intern at L&T in heavy engineering",
+            "R&D intern at Bosch on automotive components",
+            "product design intern at Godrej appliances division",
+            "quality engineering intern at Maruti Suzuki",
+            "CAE analyst intern at TAFE for tractor design",
+            "tool design intern at Hero MotoCorp",
+            "thermal analysis intern at Thermax",
+            "research intern at IIT Madras on composite materials"
+        ]
+    },
+    "industry_benchmarks": {
+        "min_employability_score": 0.55,
+        "expected_cgpa": 7.0,
+        "expected_internship_months": 3,
+        "critical_skills": [
+            "cad",
+            "manufacturing_basics",
+            "engineering_drawing"
+        ],
+        "nice_to_have_skills": [
+            "fea",
+            "cfd",
+            "python",
+            "automation"
+        ]
+    },
+    "skill_gaps_mapping": {
+        "cae_simulation": {
+            "demand_score": 0.70,
+            "courses": [
+                "ANSYS Certification",
+                "CATIA V5 Mastery"
+            ],
+            "certifications": [
+                "CSWA",
+                "CSWP",
+                "ANSYS Certified"
+            ]
+        },
+        "ev_powertrain": {
+            "demand_score": 0.75,
+            "courses": [
+                "Electric Vehicle Technology",
+                "Battery Management Systems"
+            ],
+            "certifications": [
+                "EV Design Certification"
+            ]
+        },
+        "automation": {
+            "demand_score": 0.65,
+            "courses": [
+                "Industrial Automation",
+                "PLC Programming"
+            ],
+            "certifications": [
+                "Siemens TIA Portal",
+                "Allen Bradley"
+            ]
+        },
+        "industry_4_0": {
+            "demand_score": 0.60,
+            "courses": [
+                "IoT for Manufacturing",
+                "Digital Twin Technology"
+            ],
+            "certifications": [
+                "Industry 4.0 Certification"
+            ]
+        }
+    },
+    "detection_keywords": [
+        "mechanical",
+        "design engineer",
+        "manufacturing",
+        "automotive",
+        "product design",
+        "cad",
+        "solidworks",
+        "catia",
+        "ansys",
+        "thermodynamics",
+        "heat transfer",
+        "fluid mechanics"
+    ]
+}

domains/software_engineering.json ADDED Viewed

	@@ -0,0 +1,162 @@

+{
+    "domain_id": "software_engineering",
+    "display_name": "Software Engineering",
+    "description": "Backend, Frontend, Full-stack, DevOps, and general software development",
+    "core_skills": [
+        "python",
+        "java",
+        "javascript",
+        "typescript",
+        "golang",
+        "c++",
+        "react",
+        "nodejs",
+        "django",
+        "spring",
+        "fastapi",
+        "flask",
+        "sql",
+        "postgresql",
+        "mongodb",
+        "redis",
+        "docker",
+        "kubernetes",
+        "aws",
+        "gcp",
+        "azure",
+        "git",
+        "ci_cd",
+        "testing",
+        "system_design"
+    ],
+    "aspect_prototypes": {
+        "technical_skills": [
+            "developed RESTful APIs using FastAPI",
+            "built microservices architecture with Docker",
+            "implemented CI/CD pipelines for automated deployment",
+            "designed database schemas for high-traffic applications",
+            "wrote production-grade Python code with unit tests",
+            "deployed applications to AWS using ECS and Lambda",
+            "built React frontend with Redux state management",
+            "optimized SQL queries reducing latency by 50%",
+            "implemented caching layer with Redis",
+            "created data pipelines using Apache Airflow"
+        ],
+        "problem_solving": [
+            "debugged production outage affecting 10K users",
+            "optimized algorithm complexity from O(n²) to O(n log n)",
+            "resolved memory leak in long-running service",
+            "diagnosed and fixed race condition in concurrent code",
+            "designed fallback strategy for external API failures",
+            "reduced P95 latency from 500ms to 100ms",
+            "automated manual deployment reducing errors by 80%",
+            "created monitoring dashboards to detect issues early",
+            "refactored legacy codebase improving maintainability",
+            "implemented retry logic with exponential backoff"
+        ],
+        "leadership": [
+            "led a team of 5 engineers on product launch",
+            "managed sprint planning and backlog prioritization",
+            "conducted code reviews for junior developers",
+            "organized weekly tech talks for knowledge sharing",
+            "coordinated cross-team integration project",
+            "mentored 3 interns during summer program",
+            "drove architectural decisions for new microservice",
+            "led incident response during production outage",
+            "facilitated retrospectives improving team velocity",
+            "championed adoption of testing best practices"
+        ],
+        "internship_experience": [
+            "6 months SWE intern at Google building recommendation systems",
+            "summer internship at Microsoft on Azure DevOps team",
+            "3 months ML intern at startup developing NLP models",
+            "backend engineering intern at Stripe working on payments",
+            "full-stack intern at Flipkart building seller dashboard",
+            "DevOps intern at Infosys implementing CI/CD",
+            "research intern at IIT Bombay on distributed systems",
+            "mobile development intern at Zomato for Android app",
+            "data engineering intern at Razorpay building pipelines",
+            "platform intern at Amazon working on internal tools"
+        ]
+    },
+    "industry_benchmarks": {
+        "min_employability_score": 0.60,
+        "expected_cgpa": 7.5,
+        "expected_internship_months": 4,
+        "critical_skills": [
+            "python",
+            "sql",
+            "git",
+            "system_design"
+        ],
+        "nice_to_have_skills": [
+            "kubernetes",
+            "aws",
+            "redis",
+            "graphql"
+        ]
+    },
+    "skill_gaps_mapping": {
+        "cloud": {
+            "demand_score": 0.85,
+            "courses": [
+                "AWS Solutions Architect",
+                "GCP Professional",
+                "Azure Fundamentals"
+            ],
+            "certifications": [
+                "AWS SAA",
+                "GCP ACE",
+                "Azure AZ-900"
+            ]
+        },
+        "devops": {
+            "demand_score": 0.80,
+            "courses": [
+                "Docker Mastery",
+                "Kubernetes for Developers",
+                "CI/CD with GitHub Actions"
+            ],
+            "certifications": [
+                "CKA",
+                "Docker DCA",
+                "Jenkins Certified"
+            ]
+        },
+        "system_design": {
+            "demand_score": 0.75,
+            "courses": [
+                "Grokking System Design",
+                "Designing Data-Intensive Applications"
+            ],
+            "certifications": []
+        },
+        "dsa": {
+            "demand_score": 0.70,
+            "courses": [
+                "LeetCode Patterns",
+                "AlgoExpert",
+                "NeetCode 150"
+            ],
+            "certifications": []
+        }
+    },
+    "detection_keywords": [
+        "software",
+        "developer",
+        "engineer",
+        "backend",
+        "frontend",
+        "fullstack",
+        "web development",
+        "api",
+        "microservice",
+        "devops",
+        "sre",
+        "platform",
+        "coding",
+        "programming",
+        "python developer",
+        "java developer"
+    ]
+}

models/personality_responses.py ADDED Viewed

	@@ -0,0 +1,48 @@

+"""Personality responses model"""
+from typing import Dict, List
+from dataclasses import dataclass
+@dataclass
+class PersonalityResponses:
+    student_id: str
+    responses: Dict[str, int]  # p_q1: 1-5, p_q2: 1-5, etc.
+    def to_dict(self):
+        data = {'student_id': self.student_id}
+        data.update(self.responses)
+        return data
+    @staticmethod
+    def get_questions() -> List[Dict[str, str]]:
+        """Return 20 curated personality questions mapped to Big Five traits"""
+        return [
+            # Openness (4 questions)
+            {"id": "p_q1", "text": "I enjoy exploring new ideas and concepts", "trait": "openness"},
+            {"id": "p_q2", "text": "I prefer routine over spontaneity", "trait": "openness_r"},
+            {"id": "p_q3", "text": "I am curious about many different things", "trait": "openness"},
+            {"id": "p_q4", "text": "I appreciate art and creative expression", "trait": "openness"},
+            # Conscientiousness (4 questions)
+            {"id": "p_q5", "text": "I am highly organized and plan ahead", "trait": "conscientiousness"},
+            {"id": "p_q6", "text": "I often procrastinate on tasks", "trait": "conscientiousness_r"},
+            {"id": "p_q7", "text": "I pay attention to details", "trait": "conscientiousness"},
+            {"id": "p_q8", "text": "I complete tasks on time", "trait": "conscientiousness"},
+            # Extraversion (4 questions)
+            {"id": "p_q9", "text": "I enjoy being the center of attention", "trait": "extraversion"},
+            {"id": "p_q10", "text": "I prefer working alone", "trait": "extraversion_r"},
+            {"id": "p_q11", "text": "I make friends easily", "trait": "extraversion"},
+            {"id": "p_q12", "text": "I am energized by social interactions", "trait": "extraversion"},
+            # Agreeableness (4 questions)
+            {"id": "p_q13", "text": "I am considerate of others' feelings", "trait": "agreeableness"},
+            {"id": "p_q14", "text": "I prefer competition over collaboration", "trait": "agreeableness_r"},
+            {"id": "p_q15", "text": "I trust people easily", "trait": "agreeableness"},
+            {"id": "p_q16", "text": "I help others when they need it", "trait": "agreeableness"},
+            # Emotional Stability (4 questions)
+            {"id": "p_q17", "text": "I handle stress well", "trait": "stability"},
+            {"id": "p_q18", "text": "I often feel anxious", "trait": "stability_r"},
+            {"id": "p_q19", "text": "I remain calm under pressure", "trait": "stability"},
+            {"id": "p_q20", "text": "I recover quickly from setbacks", "trait": "stability"},
+        ]

models/student.py ADDED Viewed

	@@ -0,0 +1,42 @@

+"""Student data model"""
+from typing import Optional
+from dataclasses import dataclass
+@dataclass
+class Student:
+    student_id: str
+    user_id: Optional[str]
+    cgpa: float
+    sgpa_sem1: Optional[float]
+    sgpa_sem2: Optional[float]
+    sgpa_sem3: Optional[float]
+    sgpa_sem4: Optional[float]
+    sgpa_sem5: Optional[float]
+    sgpa_sem6: Optional[float]
+    sgpa_sem7: Optional[float]
+    sgpa_sem8: Optional[float]
+    tenth_pct: Optional[float]
+    twelfth_pct: Optional[float]
+    extracurricular_text: Optional[str] = None
+    certifications_text: Optional[str] = None
+    internship_text: Optional[str] = None
+    def to_dict(self):
+        return {
+            'student_id': self.student_id,
+            'user_id': self.user_id,
+            'cgpa': self.cgpa,
+            'sgpa_sem1': self.sgpa_sem1,
+            'sgpa_sem2': self.sgpa_sem2,
+            'sgpa_sem3': self.sgpa_sem3,
+            'sgpa_sem4': self.sgpa_sem4,
+            'sgpa_sem5': self.sgpa_sem5,
+            'sgpa_sem6': self.sgpa_sem6,
+            'sgpa_sem7': self.sgpa_sem7,
+            'sgpa_sem8': self.sgpa_sem8,
+            'tenth_pct': self.tenth_pct,
+            'twelfth_pct': self.twelfth_pct,
+            'extracurricular_text': self.extracurricular_text,
+            'certifications_text': self.certifications_text,
+            'internship_text': self.internship_text
+        }

models/text_responses.py ADDED Viewed

	@@ -0,0 +1,39 @@

+"""Text responses model"""
+from typing import List, Dict
+from dataclasses import dataclass
+@dataclass
+class TextResponses:
+    student_id: str
+    text_q1: str  # Strengths
+    text_q2: str  # Career interests
+    text_q3: str  # Extracurriculars + leadership
+    def to_dict(self):
+        return {
+            'student_id': self.student_id,
+            'text_q1': self.text_q1,
+            'text_q2': self.text_q2,
+            'text_q3': self.text_q3
+        }
+    @staticmethod
+    def get_questions() -> List[Dict[str, str]]:
+        """Return the 3 textual questions"""
+        return [
+            {
+                "id": "text_q1",
+                "text": "What are your key strengths and technical skills? (150-300 words)",
+                "placeholder": "Describe your technical skills, soft skills, and what makes you stand out..."
+            },
+            {
+                "id": "text_q2",
+                "text": "What are your career interests and goals? (150-300 words)",
+                "placeholder": "Describe your ideal career path, industries of interest, and long-term goals..."
+            },
+            {
+                "id": "text_q3",
+                "text": "Describe your extracurricular activities and leadership experiences. (150-300 words)",
+                "placeholder": "Share your involvement in clubs, projects, leadership roles, and impact..."
+            }
+        ]

requirements.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+Flask==3.0.0
+Flask-CORS==4.0.0
+supabase==2.9.0
+websockets>=15.0.1
+sentence-transformers>=2.2.0
+numpy>=1.24.0
+pandas>=2.0.0
+scikit-learn>=1.3.0
+python-dotenv==1.0.0
+gunicorn==21.2.0
+torch>=2.0.0
+transformers>=4.30.0

routes/domain.py ADDED Viewed

	@@ -0,0 +1,202 @@

+"""Domain Evidence Routes
+API endpoints for submitting and managing domain-specific evidence
+"""
+from flask import Blueprint, request, jsonify
+import logging
+from database.db import get_db
+from services.domain_plugins import DomainPluginFactory
+from services.domain_plugins.tech_plugin import TechPlugin
+from services.domain_plugins.business_plugin import BusinessPlugin
+from services.domain_plugins.creative_plugin import CreativePlugin
+from services.domain_plugins.research_plugin import ResearchPlugin
+domain_bp = Blueprint('domain', __name__)
+db = get_db()
+logger = logging.getLogger(__name__)
+@domain_bp.route('/domain/available', methods=['GET'])
+def list_available_domains():
+    """List all available domain plugins"""
+    try:
+        domains = DomainPluginFactory.list_available_domains()
+        # Get detailed info for each domain
+        domain_info = []
+        for domain_type in domains:
+            info = DomainPluginFactory.get_domain_info(domain_type)
+            if info:
+                domain_info.append(info)
+        return jsonify({
+            'success': True,
+            'domains': domain_info
+        }), 200
+    except Exception as e:
+        logger.error(f"Error listing domains: {e}")
+        return jsonify({'error': str(e)}), 500
+@domain_bp.route('/students/<student_id>/domain-evidence', methods=['POST'])
+def submit_domain_evidence(student_id):
+    """Submit domain-specific evidence for scoring"""
+    try:
+        data = request.json
+        domain_type = data.get('domain_type')
+        evidence_data = data.get('evidence_data', {})
+        # Validate domain type
+        if not DomainPluginFactory.is_domain_available(domain_type):
+            return jsonify({
+                'error': f'Invalid domain type: {domain_type}',
+                'available_domains': DomainPluginFactory.list_available_domains()
+            }), 400
+        # Get plugin
+        plugin = DomainPluginFactory.get_plugin(domain_type)
+        # Validate inputs
+        is_valid, error_msg = plugin.validate_inputs(evidence_data)
+        if not is_valid:
+            return jsonify({'error': error_msg}), 400
+        # Check if student exists
+        student_check = db.table('analytics_students').select('student_id').eq('student_id', student_id).execute()
+        if not student_check.data:
+            return jsonify({'error': 'Student not found'}), 404
+        # Score the evidence
+        logger.info(f"Scoring {domain_type} evidence for student {student_id}")
+        domain_score = plugin.score(evidence_data)
+        # Store evidence and score
+        evidence_record = {
+            'student_id': student_id,
+            'domain_type': domain_type,
+            'evidence_data': evidence_data,
+            'domain_score': domain_score.score,
+            'domain_confidence': domain_score.confidence,
+            'raw_features': domain_score.raw_features,
+            'processing_status': 'completed'
+        }
+        result = db.table('analytics_domain_evidence').upsert(evidence_record).execute()
+        # Update student's active domain and cached scores
+        student_update = {
+            'student_id': student_id,
+            'active_domain': domain_type,
+            'domain_score': domain_score.score,
+            'domain_confidence': domain_score.confidence
+        }
+        db.table('analytics_students').upsert(student_update).execute()
+        logger.info(f"Domain evidence submitted successfully: {domain_type} score = {domain_score.score:.3f}")
+        return jsonify({
+            'success': True,
+            'domain_score': domain_score.to_dict(),
+            'message': f'{domain_type.capitalize()} domain evidence processed successfully'
+        }), 201
+    except Exception as e:
+        logger.error(f"Error submitting domain evidence: {e}")
+        import traceback
+        traceback.print_exc()
+        # Store error in database
+        try:
+            error_record = {
+                'student_id': student_id,
+                'domain_type': domain_type,
+                'evidence_data': evidence_data,
+                'processing_status': 'failed',
+                'error_message': str(e)
+            }
+            db.table('analytics_domain_evidence').upsert(error_record).execute()
+        except:
+            pass
+        return jsonify({'error': str(e)}), 500
+@domain_bp.route('/students/<student_id>/domain-evidence', methods=['GET'])
+def get_domain_evidence(student_id):
+    """Get domain evidence for a student"""
+    try:
+        result = db.table('analytics_domain_evidence').select('*').eq('student_id', student_id).execute()
+        if not result.data:
+            return jsonify({
+                'success': True,
+                'evidence': [],
+                'message': 'No domain evidence found'
+            }), 200
+        return jsonify({
+            'success': True,
+            'evidence': result.data
+        }), 200
+    except Exception as e:
+        logger.error(f"Error retrieving domain evidence: {e}")
+        return jsonify({'error': str(e)}), 500
+@domain_bp.route('/students/<student_id>/domain-evidence/<domain_type>', methods=['GET'])
+def get_specific_domain_evidence(student_id, domain_type):
+    """Get specific domain evidence for a student"""
+    try:
+        result = db.table('analytics_domain_evidence')\
+            .select('*')\
+            .eq('student_id', student_id)\
+            .eq('domain_type', domain_type)\
+            .execute()
+        if not result.data:
+            return jsonify({
+                'error': f'No {domain_type} evidence found for student {student_id}'
+            }), 404
+        return jsonify({
+            'success': True,
+            'evidence': result.data[0]
+        }), 200
+    except Exception as e:
+        logger.error(f"Error retrieving domain evidence: {e}")
+        return jsonify({'error': str(e)}), 500
+@domain_bp.route('/students/<student_id>/domain-evidence/<domain_type>', methods=['DELETE'])
+def delete_domain_evidence(student_id, domain_type):
+    """Delete domain evidence for a student"""
+    try:
+        # Delete evidence record
+        db.table('analytics_domain_evidence')\
+            .delete()\
+            .eq('student_id', student_id)\
+            .eq('domain_type', domain_type)\
+            .execute()
+        # Update student to clear active domain if it matches
+        student = db.table('analytics_students').select('active_domain').eq('student_id', student_id).execute()
+        if student.data and student.data[0].get('active_domain') == domain_type:
+            db.table('analytics_students').update({
+                'active_domain': None,
+                'domain_score': None,
+                'domain_confidence': None
+            }).eq('student_id', student_id).execute()
+        return jsonify({
+            'success': True,
+            'message': f'{domain_type.capitalize()} evidence deleted'
+        }), 200
+    except Exception as e:
+        logger.error(f"Error deleting domain evidence: {e}")
+        return jsonify({'error': str(e)}), 500

routes/scoring.py ADDED Viewed

	@@ -0,0 +1,204 @@

+"""Scoring routes"""
+from flask import Blueprint, jsonify
+from database.db import get_db
+from services.universal_module import UniversalModule
+from services.personality_module import PersonalityModule
+from services.text_module import TextModule
+from services.fusion import FusionEngine
+scoring_bp = Blueprint('scoring', __name__)
+db = get_db()
+# Initialize modules
+universal_module = UniversalModule()
+personality_module = PersonalityModule()
+text_module = TextModule()
+fusion_engine = FusionEngine()
+@scoring_bp.route('/score/<student_id>', methods=['GET'])
+def get_student_score(student_id):
+    """
+    Compute and return full scoring packet for a student
+    """
+    try:
+        # 1. Fetch student data
+        student_result = db.table('analytics_students').select('*').eq('student_id', student_id).single().execute()
+        if not student_result.data:
+            return jsonify({'error': 'Student not found'}), 404
+        student_data = student_result.data
+        # 2. Fetch personality responses
+        personality_result = db.table('analytics_personality_responses').select('*').eq('student_id', student_id).maybe_single().execute()
+        personality_responses = {}
+        if personality_result.data:
+            personality_responses = {k: v for k, v in personality_result.data.items() if k.startswith('p_q')}
+        # 3. Fetch text responses
+        text_result = db.table('analytics_text_responses').select('*').eq('student_id', student_id).maybe_single().execute()
+        text_responses = {}
+        if text_result.data:
+            text_responses = {
+                'text_q1': text_result.data.get('text_q1', ''),
+                'text_q2': text_result.data.get('text_q2', ''),
+                'text_q3': text_result.data.get('text_q3', '')
+            }
+        # 3.5. Fetch domain evidence (if exists)
+        domain_score = None
+        domain_confidence = None
+        domain_type = None
+        domain_features = {}
+        if student_data.get('active_domain'):
+            domain_type = student_data.get('active_domain')
+            domain_score = student_data.get('domain_score')
+            domain_confidence = student_data.get('domain_confidence')
+            # Fetch detailed domain evidence
+            domain_result = db.table('analytics_domain_evidence')\
+                .select('*')\
+                .eq('student_id', student_id)\
+                .eq('domain_type', domain_type)\
+                .maybe_single()\
+                .execute()
+            if domain_result.data:
+                domain_features = domain_result.data.get('raw_features', {})
+        # 4. Calculate universal score
+        universal_score, universal_confidence, universal_features = universal_module.score(student_data)
+        universal_explanations = universal_module.explain(universal_features)
+        # 5. Calculate personality score
+        personality_score, personality_confidence, personality_traits = personality_module.score(personality_responses)
+        personality_explanations = personality_module.explain(personality_traits)
+        # 6. Calculate text score
+        text_score, text_confidence, text_features = text_module.score(text_responses)
+        text_explanations = text_module.explain(text_features)
+        # 7. Fuse scores (with optional domain score)
+        final_score, breakdown = fusion_engine.fuse_scores(
+            universal_score, universal_confidence,
+            personality_score, personality_confidence,
+            text_score, text_confidence,
+            domain_score, domain_confidence
+        )
+        # 8. Get grade and percentile
+        grade = fusion_engine.get_grade(final_score)
+        percentile = fusion_engine.get_percentile(final_score)
+        # 9. Prepare response
+        response = {
+            'student_id': student_id,
+            'final_score': round(final_score, 4),
+            'grade': grade,
+            'percentile': percentile,
+            'scores': breakdown,
+            'explanations': {
+                'universal': universal_explanations,
+                'personality': personality_explanations,
+                'text': text_explanations
+            },
+            'detailed_features': {
+                'universal': {k: round(v, 3) for k, v in universal_features.items()},
+                'personality': {k: round(v, 3) for k, v in personality_traits.items()},
+                'text': {k: round(v, 3) for k, v in text_features.items()}
+            },
+            'data_completeness': {
+                'universal': universal_confidence,
+                'personality': personality_confidence,
+                'text': text_confidence
+            }
+        }
+        # Add domain information if present
+        if domain_type:
+            response['domain_type'] = domain_type
+            response['detailed_features']['domain'] = {k: round(v, 3) for k, v in domain_features.items()}
+            response['data_completeness']['domain'] = domain_confidence
+            response['explanations']['domain'] = {
+                'message': f'{domain_type.capitalize()} domain evidence provided',
+                'features': domain_features
+            }
+        else:
+            response['domain_type'] = None
+            response['explanations']['domain'] = {
+                'message': 'No domain-specific evidence submitted. Submit GitHub/portfolio/resume for enhanced scoring.'
+            }
+        return jsonify({
+            'success': True,
+            'data': response
+        }), 200
+    except Exception as e:
+        import traceback
+        traceback.print_exc()
+        return jsonify({'error': str(e)}), 500
+@scoring_bp.route('/leaderboard', methods=['GET'])
+def get_leaderboard():
+    """
+    Get top students by score (mock for MVP)
+    In production, this would compute and cache scores
+    """
+    try:
+        # Fetch all students
+        students = db.table('analytics_students').select('*').execute()
+        leaderboard = []
+        for student in students.data[:10]:  # Limit to 10 for MVP
+            try:
+                # Quick score calculation
+                student_id = student['student_id']
+                # Get personality
+                personality_result = db.table('analytics_personality_responses').select('*').eq('student_id', student_id).maybe_single().execute()
+                personality_responses = {}
+                if personality_result.data:
+                    personality_responses = {k: v for k, v in personality_result.data.items() if k.startswith('p_q')}
+                # Get text
+                text_result = db.table('analytics_text_responses').select('*').eq('student_id', student_id).maybe_single().execute()
+                text_responses = {}
+                if text_result.data:
+                    text_responses = {
+                        'text_q1': text_result.data.get('text_q1', ''),
+                        'text_q2': text_result.data.get('text_q2', ''),
+                        'text_q3': text_result.data.get('text_q3', '')
+                    }
+                # Calculate scores
+                universal_score, universal_conf, _ = universal_module.score(student)
+                personality_score, personality_conf, _ = personality_module.score(personality_responses)
+                text_score, text_conf, _ = text_module.score(text_responses)
+                final_score, _ = fusion_engine.fuse_scores(
+                    universal_score, universal_conf,
+                    personality_score, personality_conf,
+                    text_score, text_conf
+                )
+                leaderboard.append({
+                    'student_id': student_id,
+                    'final_score': round(final_score, 3),
+                    'grade': fusion_engine.get_grade(final_score)
+                })
+            except:
+                continue
+        # Sort by score
+        leaderboard.sort(key=lambda x: x['final_score'], reverse=True)
+        return jsonify({
+            'success': True,
+            'data': leaderboard
+        }), 200
+    except Exception as e:
+        return jsonify({'error': str(e)}), 500

routes/students.py ADDED Viewed

	@@ -0,0 +1,173 @@

+"""Student management routes"""
+from flask import Blueprint, request, jsonify
+from database.db import get_db
+from models.student import Student
+students_bp = Blueprint('students', __name__)
+db = get_db()
+@students_bp.route('/students', methods=['POST'])
+def create_student():
+    """Create a new student profile"""
+    try:
+        data = request.json
+        print(f"Received data: {data}")  # Debug log
+        # Validate required fields
+        required = ['student_id', 'cgpa']
+        if not all(k in data for k in required):
+            return jsonify({'error': 'Missing required fields'}), 400
+        # Create student record
+        student = Student(
+            student_id=data['student_id'],
+            user_id=data.get('user_id'),
+            cgpa=data['cgpa'],
+            sgpa_sem1=data.get('sgpa_sem1'),
+            sgpa_sem2=data.get('sgpa_sem2'),
+            sgpa_sem3=data.get('sgpa_sem3'),
+            sgpa_sem4=data.get('sgpa_sem4'),
+            sgpa_sem5=data.get('sgpa_sem5'),
+            sgpa_sem6=data.get('sgpa_sem6'),
+            sgpa_sem7=data.get('sgpa_sem7'),
+            sgpa_sem8=data.get('sgpa_sem8'),
+            tenth_pct=data.get('tenth_pct'),
+            twelfth_pct=data.get('twelfth_pct'),
+            extracurricular_text=data.get('extracurricular_text'),
+            certifications_text=data.get('certifications_text'),
+            internship_text=data.get('internship_text')
+        )
+        print(f"Student object created: {student.to_dict()}")  # Debug log
+        # Insert or update into database (upsert)
+        result = db.table('analytics_students').upsert(student.to_dict()).execute()
+        print(f"Database result: {result}")  # Debug log
+        return jsonify({
+            'success': True,
+            'data': result.data[0] if result.data else None
+        }), 201
+    except Exception as e:
+        print(f"Error in create_student: {str(e)}")  # Debug log
+        import traceback
+        traceback.print_exc()
+        return jsonify({'error': str(e)}), 500
+@students_bp.route('/students', methods=['GET'])
+def list_students():
+    """List all students"""
+    try:
+        result = db.table('analytics_students').select('*').execute()
+        return jsonify({
+            'success': True,
+            'data': result.data
+        }), 200
+    except Exception as e:
+        return jsonify({'error': str(e)}), 500
+@students_bp.route('/students/<student_id>', methods=['GET'])
+def get_student(student_id):
+    """Get a specific student"""
+    try:
+        result = db.table('analytics_students').select('*').eq('student_id', student_id).single().execute()
+        if not result.data:
+            return jsonify({'error': 'Student not found'}), 404
+        return jsonify({
+            'success': True,
+            'data': result.data
+        }), 200
+    except Exception as e:
+        return jsonify({'error': str(e)}), 500
+@students_bp.route('/personality/<student_id>', methods=['POST'])
+def submit_personality(student_id):
+    """Submit personality responses for a student"""
+    try:
+        data = request.json
+        # Validate student exists
+        student = db.table('analytics_students').select('student_id').eq('student_id', student_id).single().execute()
+        if not student.data:
+            return jsonify({'error': 'Student not found'}), 404
+        # Prepare personality data
+        personality_data = {'student_id': student_id}
+        for i in range(1, 21):
+            key = f'p_q{i}'
+            if key in data:
+                personality_data[key] = data[key]
+        # Insert or update
+        result = db.table('analytics_personality_responses').upsert(personality_data).execute()
+        return jsonify({
+            'success': True,
+            'data': result.data[0] if result.data else None
+        }), 201
+    except Exception as e:
+        return jsonify({'error': str(e)}), 500
+@students_bp.route('/text/<student_id>', methods=['POST'])
+def submit_text(student_id):
+    """Submit text responses for a student"""
+    try:
+        data = request.json
+        # Validate student exists
+        student = db.table('analytics_students').select('student_id').eq('student_id', student_id).single().execute()
+        if not student.data:
+            return jsonify({'error': 'Student not found'}), 404
+        # Validate required text fields
+        required = ['text_q1', 'text_q2', 'text_q3']
+        if not all(k in data for k in required):
+            return jsonify({'error': 'Missing required text fields'}), 400
+        # Prepare text data
+        text_data = {
+            'student_id': student_id,
+            'text_q1': data['text_q1'],
+            'text_q2': data['text_q2'],
+            'text_q3': data['text_q3']
+        }
+        # Insert or update
+        result = db.table('analytics_text_responses').upsert(text_data).execute()
+        return jsonify({
+            'success': True,
+            'data': result.data[0] if result.data else None
+        }), 201
+    except Exception as e:
+        return jsonify({'error': str(e)}), 500
+@students_bp.route('/questions/personality', methods=['GET'])
+def get_personality_questions():
+    """Get the 20 personality questions"""
+    from models.personality_responses import PersonalityResponses
+    return jsonify({
+        'success': True,
+        'data': PersonalityResponses.get_questions()
+    }), 200
+@students_bp.route('/questions/text', methods=['GET'])
+def get_text_questions():
+    """Get the 3 text questions"""
+    from models.text_responses import TextResponses
+    return jsonify({
+        'success': True,
+        'data': TextResponses.get_questions()
+    }), 200

services/README_text_v2.md ADDED Viewed

	@@ -0,0 +1,210 @@

+# Text Module V2 - Aspect-Based Scoring
+## Overview
+Enhanced text analysis using prototype-based aspect extraction with `all-mpnet-base-v2` embeddings.
+## Changes from V1
+- **Model**: Upgraded from `all-MiniLM-L6-v2` (384d) to `all-mpnet-base-v2` (768d)
+- **Approach**: Moved from simple reference embeddings to aspect-based prototype scoring
+- **Aspects**: 10 employability aspects (leadership, technical_skills, problem_solving, etc.)
+- **Admin**: Runtime seed updates via REST API
+## Configuration
+### Model Selection
+Set via environment variable or constructor:
+```bash
+export ASPECT_MODEL_NAME=all-mpnet-base-v2  # default
+# or
+export ASPECT_MODEL_NAME=all-MiniLM-L6-v2   # fallback
+```
+```python
+from services.text_module_v2 import TextModuleV2
+# Default (all-mpnet-base-v2)
+text_module = TextModuleV2()
+# Override model
+text_module = TextModuleV2(model_name='all-MiniLM-L6-v2')
+```
+### Aspect Seeds
+Seeds loaded from `./aspect_seeds.json` (created by default). Edit this file to customize aspect definitions.
+**Location**: `analytics/backend/aspect_seeds.json`
+### Centroids Cache
+Pre-computed centroids saved to `./aspect_centroids.npz` for fast cold starts.
+## Usage
+### Basic Scoring
+```python
+text_module = TextModuleV2()
+text_responses = {
+    'text_q1': "I developed ML pipelines using Python and scikit-learn...",
+    'text_q2': "My career goal is to become a data scientist...",
+    'text_q3': "I led a team of 5 students in a hackathon project..."
+}
+score, confidence, features = text_module.score(text_responses)
+print(f"Score: {score:.2f}, Confidence: {confidence:.2f}")
+print(f"Features: {features}")
+```
+### Get Current Seeds
+```python
+seeds = text_module.get_aspect_seeds()
+print(f"Loaded {len(seeds)} aspects")
+```
+## Admin API
+### Setup
+```python
+from flask import Flask
+from services.text_module_v2 import TextModuleV2, register_admin_seed_endpoint
+app = Flask(__name__)
+text_module = TextModuleV2()
+# Register admin endpoints
+register_admin_seed_endpoint(app, text_module)
+app.run(port=5001)
+```
+Set admin token:
+```bash
+export ADMIN_SEED_TOKEN=your-secret-token
+```
+### Endpoints
+#### GET /admin/aspect-seeds
+Get current loaded seeds.
+**Request**:
+```bash
+curl -H "X-Admin-Token: your-secret-token" \
+  http://localhost:5001/admin/aspect-seeds
+```
+**Response**:
+```json
+{
+  "success": true,
+  "seeds": {
+    "leadership": ["led a team", "managed project", ...],
+    "technical_skills": [...]
+  },
+  "num_aspects": 10
+}
+```
+#### POST /admin/aspect-seeds
+Update aspect seeds (recomputes centroids).
+**Request**:
+```bash
+curl -X POST \
+  -H "X-Admin-Token: your-secret-token" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "seeds": {
+      "leadership": [
+        "led a team",
+        "managed stakeholders",
+        "organized events"
+      ],
+      "technical_skills": [
+        "developed web API",
+        "built ML models"
+      ]
+    },
+    "persist": true
+  }' \
+  http://localhost:5001/admin/aspect-seeds
+```
+**Response**:
+```json
+{
+  "success": true,
+  "message": "Aspect seeds updated successfully",
+  "stats": {
+    "num_aspects": 2,
+    "avg_seed_count": 2.5,
+    "timestamp": "2025-12-09T10:30:00Z"
+  }
+}
+```
+## Advanced: Seed Expansion
+Suggest new seed phrases from a corpus:
+```python
+corpus = [
+    "I led the product development team and managed stakeholders",
+    "Implemented CI/CD pipelines for automated testing",
+    # ... more texts
+]
+suggestions = text_module.suggest_seed_expansions(
+    corpus_texts=corpus,
+    aspect_key='leadership',
+    top_n=20
+)
+print("Suggested seeds:", suggestions)
+```
+## Aspect → Question Mapping
+```python
+from services.text_module_v2 import get_relevant_aspects_for_question
+# Q1: Strengths & skills
+aspects_q1 = get_relevant_aspects_for_question('text_q1')
+# ['technical_skills', 'problem_solving', 'learning_agility', 'initiative', 'communication']
+# Q2: Career interests
+aspects_q2 = get_relevant_aspects_for_question('text_q2')
+# ['career_alignment', 'learning_agility', 'initiative', 'communication']
+# Q3: Extracurriculars & leadership
+aspects_q3 = get_relevant_aspects_for_question('text_q3')
+# ['leadership', 'teamwork', 'project_execution', 'internships_experience', 'communication']
+```
+## Files
+| File | Purpose |
+|------|---------|
+| `services/text_module_v2.py` | Main module implementation |
+| `aspect_seeds.json` | Aspect seed definitions (editable) |
+| `aspect_centroids.npz` | Cached centroids (auto-generated) |
+## Performance
+- **Model Load**: ~3s (first time)
+- **Centroid Build**: ~1s for 10 aspects with 20 seeds each
+- **Text Scoring**: ~200-500ms per 3-question set (CPU)
+## Logging
+Module logs to Python's `logging` system:
+```python
+import logging
+logging.basicConfig(level=logging.INFO)
+```
+Key events logged:
+- Model loading
+- Seed updates (with masked token)
+- Centroid recomputation
+- File I/O operations

services/batch_aggregation.py ADDED Viewed

	@@ -0,0 +1,414 @@

+"""
+Batch Aggregation Service - College-level macro analysis
+Aggregates individual student scores into batch-level reports
+"""
+import logging
+import numpy as np
+from typing import Dict, List, Any, Optional
+from dataclasses import dataclass, asdict
+from datetime import datetime
+from collections import Counter
+logger = logging.getLogger(__name__)
+@dataclass
+class AggregateMetrics:
+    """Batch-level aggregate metrics"""
+    total_students: int
+    avg_employability_score: float
+    median_score: float
+    std_dev: float
+    placement_ready_pct: float  # % with score >= 0.6
+    skill_diversity_index: int  # Unique skills count
+    avg_cgpa: float
+    avg_internship_months: float
+@dataclass
+class AspectDistribution:
+    """Distribution stats for an aspect"""
+    aspect: str
+    avg: float
+    std: float
+    min: float
+    max: float
+    top_10_pct_avg: float  # Avg of top 10%
+    bottom_10_pct_avg: float
+@dataclass
+class DomainBreakdown:
+    """Domain-wise student distribution"""
+    domain_id: str
+    display_name: str
+    count: int
+    percentage: float
+    avg_score: float
+@dataclass
+class SkillGap:
+    """Skill gap analysis result"""
+    skill: str
+    demand_score: float
+    students_with_skill: int
+    students_pct: float
+    gap_severity: str  # 'critical', 'moderate', 'low'
+@dataclass
+class BatchRecommendation:
+    """Recommendation for batch improvement"""
+    category: str  # 'curriculum', 'training', 'industry'
+    priority: str  # 'high', 'medium', 'low'
+    recommendation: str
+    impact: str
+class BatchAggregationService:
+    """
+    Aggregates individual student data into college-level macro reports
+    """
+    # Thresholds
+    PLACEMENT_READY_THRESHOLD = 0.60
+    CRITICAL_GAP_THRESHOLD = 0.30  # < 30% students have skill
+    MODERATE_GAP_THRESHOLD = 0.50
+    def __init__(self):
+        # Industry demand mapping (can be loaded from external source)
+        self.industry_demands = {
+            'python': 0.90,
+            'sql': 0.85,
+            'java': 0.80,
+            'javascript': 0.75,
+            'machine_learning': 0.70,
+            'cloud': 0.85,
+            'devops': 0.75,
+            'data_analysis': 0.70,
+            'system_design': 0.65,
+            'communication': 0.80,
+            'leadership': 0.60,
+            'teamwork': 0.75
+        }
+    def aggregate_batch(self,
+                        students: List[Dict[str, Any]],
+                        college_name: str = "Unknown College",
+                        batch_year: int = None) -> Dict[str, Any]:
+        """
+        Generate comprehensive batch report from student data
+        Args:
+            students: List of student score packets (from scoring endpoint)
+            college_name: Name of the college
+            batch_year: Graduation year
+        Returns:
+            Complete macro analysis report
+        """
+        if not students:
+            return self._empty_report(college_name, batch_year)
+        batch_year = batch_year or datetime.now().year
+        # Extract scores and features
+        scores = []
+        cgpas = []
+        internship_months = []
+        all_skills = []
+        domain_counts = Counter()
+        aspect_scores = {
+            'technical_skills': [],
+            'problem_solving': [],
+            'leadership': [],
+            'communication': [],
+            'teamwork': [],
+            'learning_agility': []
+        }
+        for student in students:
+            # Final score
+            final_score = student.get('final_score', 0)
+            scores.append(final_score)
+            # Features
+            features = student.get('detailed_features', {})
+            universal = features.get('universal', {})
+            text = features.get('text', {})
+            cgpas.append(universal.get('cgpa_norm', 0) * 10)  # Denormalize
+            internship_months.append(universal.get('internship_exposure', 0) * 12)
+            # Domain
+            domain = student.get('domain_type') or student.get('detected_domain', 'general')
+            domain_counts[domain] += 1
+            # Aspect scores
+            for aspect in aspect_scores:
+                if aspect in text:
+                    aspect_scores[aspect].append(text[aspect])
+                elif aspect in universal:
+                    aspect_scores[aspect].append(universal[aspect])
+            # Skills (from raw student data if available)
+            if 'skills' in student:
+                skills = student['skills']
+                if isinstance(skills, str):
+                    skills = [s.strip().lower() for s in skills.split(',')]
+                all_skills.extend(skills)
+        # Compute aggregates
+        aggregate = self._compute_aggregate_metrics(
+            scores, cgpas, internship_months, all_skills
+        )
+        # Aspect distributions
+        aspects = self._compute_aspect_distributions(aspect_scores)
+        # Domain breakdown
+        domains = self._compute_domain_breakdown(domain_counts, students)
+        # Skill gaps
+        skill_gaps = self._analyze_skill_gaps(all_skills, len(students))
+        # Recommendations
+        recommendations = self._generate_recommendations(
+            aggregate, aspects, skill_gaps
+        )
+        # Build report
+        report = {
+            'report_id': f"BATCH_{batch_year}_{college_name[:3].upper()}",
+            'college_name': college_name,
+            'batch_year': batch_year,
+            'generated_at': datetime.utcnow().isoformat() + 'Z',
+            'total_students': len(students),
+            'aggregate_metrics': asdict(aggregate),
+            'score_distribution': self._compute_score_distribution(scores),
+            'aspect_analysis': [asdict(a) for a in aspects],
+            'domain_breakdown': [asdict(d) for d in domains],
+            'skill_gap_analysis': [asdict(g) for g in skill_gaps],
+            'recommendations': [asdict(r) for r in recommendations],
+            'percentile_bands': self._compute_percentile_bands(scores)
+        }
+        return report
+    def _compute_aggregate_metrics(self, scores, cgpas, internship_months,
+                                   skills) -> AggregateMetrics:
+        """Compute high-level aggregate metrics"""
+        scores_arr = np.array(scores)
+        placement_ready = sum(1 for s in scores if s >= self.PLACEMENT_READY_THRESHOLD)
+        placement_pct = (placement_ready / len(scores)) * 100 if scores else 0
+        return AggregateMetrics(
+            total_students=len(scores),
+            avg_employability_score=round(float(np.mean(scores_arr)), 3),
+            median_score=round(float(np.median(scores_arr)), 3),
+            std_dev=round(float(np.std(scores_arr)), 3),
+            placement_ready_pct=round(placement_pct, 1),
+            skill_diversity_index=len(set(skills)),
+            avg_cgpa=round(float(np.mean(cgpas)) if cgpas else 0, 2),
+            avg_internship_months=round(float(np.mean(internship_months)) if internship_months else 0, 1)
+        )
+    def _compute_aspect_distributions(self, aspect_scores) -> List[AspectDistribution]:
+        """Compute distribution stats for each aspect"""
+        distributions = []
+        for aspect, scores in aspect_scores.items():
+            if not scores:
+                continue
+            arr = np.array(scores)
+            top_10_idx = int(len(arr) * 0.1) or 1
+            bottom_10_idx = int(len(arr) * 0.1) or 1
+            sorted_arr = np.sort(arr)
+            distributions.append(AspectDistribution(
+                aspect=aspect,
+                avg=round(float(np.mean(arr)), 3),
+                std=round(float(np.std(arr)), 3),
+                min=round(float(np.min(arr)), 3),
+                max=round(float(np.max(arr)), 3),
+                top_10_pct_avg=round(float(np.mean(sorted_arr[-top_10_idx:])), 3),
+                bottom_10_pct_avg=round(float(np.mean(sorted_arr[:bottom_10_idx])), 3)
+            ))
+        return distributions
+    def _compute_domain_breakdown(self, domain_counts, students) -> List[DomainBreakdown]:
+        """Compute domain-wise breakdown"""
+        breakdowns = []
+        total = len(students)
+        for domain, count in domain_counts.most_common():
+            # Calculate avg score for this domain
+            domain_scores = [
+                s.get('final_score', 0) for s in students
+                if (s.get('domain_type') or s.get('detected_domain', 'general')) == domain
+            ]
+            avg_score = np.mean(domain_scores) if domain_scores else 0
+            breakdowns.append(DomainBreakdown(
+                domain_id=domain,
+                display_name=domain.replace('_', ' ').title(),
+                count=count,
+                percentage=round((count / total) * 100, 1),
+                avg_score=round(float(avg_score), 3)
+            ))
+        return breakdowns
+    def _analyze_skill_gaps(self, all_skills, total_students) -> List[SkillGap]:
+        """Analyze skill gaps against industry demand"""
+        skill_counts = Counter(all_skills)
+        gaps = []
+        for skill, demand in self.industry_demands.items():
+            count = skill_counts.get(skill, 0)
+            pct = (count / total_students) * 100 if total_students else 0
+            # Determine severity
+            if pct < self.CRITICAL_GAP_THRESHOLD * 100:
+                severity = 'critical'
+            elif pct < self.MODERATE_GAP_THRESHOLD * 100:
+                severity = 'moderate'
+            else:
+                severity = 'low'
+            gaps.append(SkillGap(
+                skill=skill,
+                demand_score=demand,
+                students_with_skill=count,
+                students_pct=round(pct, 1),
+                gap_severity=severity
+            ))
+        # Sort by demand * (1 - coverage)
+        gaps.sort(key=lambda g: g.demand_score * (1 - g.students_pct/100), reverse=True)
+        return gaps[:10]  # Top 10 gaps
+    def _generate_recommendations(self, aggregate, aspects,
+                                  skill_gaps) -> List[BatchRecommendation]:
+        """Generate actionable recommendations"""
+        recommendations = []
+        # Critical skill gaps
+        critical_gaps = [g for g in skill_gaps if g.gap_severity == 'critical']
+        for gap in critical_gaps[:3]:
+            recommendations.append(BatchRecommendation(
+                category='curriculum',
+                priority='high',
+                recommendation=f"Add {gap.skill.replace('_', ' ').title()} training to curriculum",
+                impact=f"Only {gap.students_pct}% students have this in-demand skill"
+            ))
+        # Low placement readiness
+        if aggregate.placement_ready_pct < 60:
+            recommendations.append(BatchRecommendation(
+                category='training',
+                priority='high',
+                recommendation="Implement intensive placement preparation program",
+                impact=f"Only {aggregate.placement_ready_pct}% students are placement-ready"
+            ))
+        # Low internship exposure
+        if aggregate.avg_internship_months < 3:
+            recommendations.append(BatchRecommendation(
+                category='industry',
+                priority='medium',
+                recommendation="Establish mandatory internship partnerships with industry",
+                impact=f"Average internship exposure is only {aggregate.avg_internship_months} months"
+            ))
+        # Weak aspects
+        for aspect in aspects:
+            if aspect.avg < 0.5:
+                recommendations.append(BatchRecommendation(
+                    category='training',
+                    priority='medium',
+                    recommendation=f"Conduct workshops on {aspect.aspect.replace('_', ' ').title()}",
+                    impact=f"Average score is {aspect.avg:.0%}, below acceptable threshold"
+                ))
+        return recommendations[:8]  # Limit to 8 recommendations
+    def _compute_score_distribution(self, scores) -> Dict[str, int]:
+        """Compute score distribution by grade bands"""
+        distribution = {
+            'A+ (90-100%)': 0,
+            'A (80-90%)': 0,
+            'B+ (70-80%)': 0,
+            'B (60-70%)': 0,
+            'C (50-60%)': 0,
+            'D (<50%)': 0
+        }
+        for score in scores:
+            pct = score * 100
+            if pct >= 90:
+                distribution['A+ (90-100%)'] += 1
+            elif pct >= 80:
+                distribution['A (80-90%)'] += 1
+            elif pct >= 70:
+                distribution['B+ (70-80%)'] += 1
+            elif pct >= 60:
+                distribution['B (60-70%)'] += 1
+            elif pct >= 50:
+                distribution['C (50-60%)'] += 1
+            else:
+                distribution['D (<50%)'] += 1
+        return distribution
+    def _compute_percentile_bands(self, scores) -> Dict[str, float]:
+        """Compute percentile thresholds"""
+        if not scores:
+            return {}
+        arr = np.array(scores)
+        return {
+            'p10': round(float(np.percentile(arr, 10)), 3),
+            'p25': round(float(np.percentile(arr, 25)), 3),
+            'p50': round(float(np.percentile(arr, 50)), 3),
+            'p75': round(float(np.percentile(arr, 75)), 3),
+            'p90': round(float(np.percentile(arr, 90)), 3)
+        }
+    def _empty_report(self, college_name: str, batch_year: int) -> Dict[str, Any]:
+        """Generate empty report for no data"""
+        return {
+            'report_id': f"BATCH_{batch_year or 'UNKNOWN'}_{college_name[:3].upper()}",
+            'college_name': college_name,
+            'batch_year': batch_year,
+            'generated_at': datetime.utcnow().isoformat() + 'Z',
+            'total_students': 0,
+            'error': 'No student data provided',
+            'aggregate_metrics': None,
+            'recommendations': []
+        }
+# Singleton
+_batch_service: Optional[BatchAggregationService] = None
+def get_batch_aggregation_service() -> BatchAggregationService:
+    """Get singleton batch aggregation service"""
+    global _batch_service
+    if _batch_service is None:
+        _batch_service = BatchAggregationService()
+    return _batch_service

services/domain_knowledge_base.py ADDED Viewed

	@@ -0,0 +1,250 @@

+"""
+Domain Knowledge Base - Dynamic domain-specific aspect prototypes and skill mapping
+"""
+import os
+import json
+import logging
+from typing import Dict, List, Optional, Tuple
+from pathlib import Path
+logger = logging.getLogger(__name__)
+class DomainConfig:
+    """Single domain configuration"""
+    def __init__(self, config_data: Dict):
+        self.domain_id = config_data.get('domain_id', 'unknown')
+        self.display_name = config_data.get('display_name', 'Unknown Domain')
+        self.description = config_data.get('description', '')
+        self.core_skills = config_data.get('core_skills', [])
+        self.aspect_prototypes = config_data.get('aspect_prototypes', {})
+        self.industry_benchmarks = config_data.get('industry_benchmarks', {})
+        self.skill_gaps_mapping = config_data.get('skill_gaps_mapping', {})
+        self.detection_keywords = config_data.get('detection_keywords', [])
+    def get_aspect_seeds(self, aspect: str) -> List[str]:
+        """Get seed phrases for a specific aspect"""
+        return self.aspect_prototypes.get(aspect, [])
+    def get_all_aspect_seeds(self) -> Dict[str, List[str]]:
+        """Get all aspect prototypes"""
+        return self.aspect_prototypes.copy()
+    def get_skill_gap_info(self, skill: str) -> Optional[Dict]:
+        """Get skill gap information including courses and certs"""
+        return self.skill_gaps_mapping.get(skill)
+    def get_benchmark(self, key: str, default=None):
+        """Get industry benchmark value"""
+        return self.industry_benchmarks.get(key, default)
+class DomainKnowledgeBase:
+    """
+    Domain Knowledge Base - loads and manages domain configurations
+    Provides domain-specific aspect prototypes for the Fidelity Criteria Transformer
+    """
+    def __init__(self, domains_dir: str = None):
+        """
+        Initialize DKB with domain configs from directory
+        Args:
+            domains_dir: Path to directory containing domain JSON files
+                         Defaults to ./domains/ relative to this file
+        """
+        if domains_dir is None:
+            domains_dir = os.path.join(os.path.dirname(__file__), 'domains')
+        self.domains_dir = Path(domains_dir)
+        self.domains: Dict[str, DomainConfig] = {}
+        self._keyword_index: Dict[str, str] = {}  # keyword -> domain_id
+        self._load_all_domains()
+        self._build_keyword_index()
+        logger.info(f"DomainKnowledgeBase initialized with {len(self.domains)} domains")
+    def _load_all_domains(self):
+        """Load all domain configs from directory"""
+        if not self.domains_dir.exists():
+            logger.warning(f"Domains directory not found: {self.domains_dir}")
+            return
+        for json_file in self.domains_dir.glob('*.json'):
+            try:
+                with open(json_file, 'r', encoding='utf-8') as f:
+                    config_data = json.load(f)
+                domain_config = DomainConfig(config_data)
+                self.domains[domain_config.domain_id] = domain_config
+                logger.info(f"Loaded domain config: {domain_config.display_name}")
+            except Exception as e:
+                logger.error(f"Failed to load domain config {json_file}: {e}")
+    def _build_keyword_index(self):
+        """Build keyword -> domain mapping for detection"""
+        for domain_id, config in self.domains.items():
+            for keyword in config.detection_keywords:
+                self._keyword_index[keyword.lower()] = domain_id
+    def get_domain(self, domain_id: str) -> Optional[DomainConfig]:
+        """Get domain config by ID"""
+        return self.domains.get(domain_id)
+    def list_domains(self) -> List[str]:
+        """List all available domain IDs"""
+        return list(self.domains.keys())
+    def detect_domain(self, text: str, skills: List[str] = None) -> Tuple[str, float]:
+        """
+        Detect most likely domain from text and/or skills
+        Args:
+            text: Text content (career goals, descriptions, etc.)
+            skills: List of skill keywords
+        Returns:
+            (domain_id, confidence) tuple
+        """
+        if not text and not skills:
+            return ('general', 0.0)
+        text_lower = (text or '').lower()
+        skills_lower = [s.lower() for s in (skills or [])]
+        domain_scores = {}
+        for domain_id, config in self.domains.items():
+            score = 0.0
+            # Keyword matching from text
+            for keyword in config.detection_keywords:
+                if keyword.lower() in text_lower:
+                    score += 0.1
+            # Skill matching
+            core_skills_lower = [s.lower() for s in config.core_skills]
+            skill_matches = sum(1 for s in skills_lower if s in core_skills_lower)
+            score += skill_matches * 0.15
+            domain_scores[domain_id] = min(score, 1.0)
+        if not domain_scores:
+            return ('general', 0.0)
+        # Return domain with highest score
+        best_domain = max(domain_scores, key=domain_scores.get)
+        confidence = domain_scores[best_domain]
+        # Minimum confidence threshold
+        if confidence < 0.2:
+            return ('general', confidence)
+        return (best_domain, confidence)
+    def get_aspect_prototypes_for_domain(self, domain_id: str) -> Dict[str, List[str]]:
+        """Get all aspect prototypes for a domain"""
+        config = self.domains.get(domain_id)
+        if config:
+            return config.get_all_aspect_seeds()
+        return {}
+    def get_merged_prototypes(self, detected_domain: str,
+                              base_aspects: Dict[str, List[str]]) -> Dict[str, List[str]]:
+        """
+        Merge domain-specific prototypes with base aspects
+        Domain-specific seeds are added to base seeds
+        Args:
+            detected_domain: Domain ID from detection
+            base_aspects: Base aspect seeds (from TextModuleV2 defaults)
+        Returns:
+            Merged aspect seeds dictionary
+        """
+        merged = {k: list(v) for k, v in base_aspects.items()}  # Deep copy
+        domain_config = self.domains.get(detected_domain)
+        if not domain_config:
+            return merged
+        # Merge domain-specific prototypes
+        for aspect, seeds in domain_config.aspect_prototypes.items():
+            if aspect in merged:
+                # Prepend domain-specific seeds (higher priority)
+                merged[aspect] = seeds + merged[aspect]
+            else:
+                merged[aspect] = seeds
+        return merged
+    def analyze_skill_gaps(self, student_skills: List[str],
+                          domain_id: str) -> List[Dict]:
+        """
+        Analyze skill gaps for a student in a given domain
+        Args:
+            student_skills: List of skills the student has
+            domain_id: Target domain
+        Returns:
+            List of skill gap objects with recommendations
+        """
+        config = self.domains.get(domain_id)
+        if not config:
+            return []
+        student_skills_lower = [s.lower() for s in student_skills]
+        gaps = []
+        for skill, gap_info in config.skill_gaps_mapping.items():
+            skill_lower = skill.lower()
+            # Check if student has this skill
+            has_skill = any(skill_lower in s or s in skill_lower
+                          for s in student_skills_lower)
+            if not has_skill:
+                gaps.append({
+                    'skill': skill,
+                    'demand_score': gap_info.get('demand_score', 0.5),
+                    'recommended_courses': gap_info.get('courses', []),
+                    'certifications': gap_info.get('certifications', []),
+                    'priority': 'high' if gap_info.get('demand_score', 0) > 0.7 else 'medium'
+                })
+        # Sort by demand score
+        gaps.sort(key=lambda x: x['demand_score'], reverse=True)
+        return gaps
+    def get_domain_summary(self, domain_id: str) -> Optional[Dict]:
+        """Get summary of a domain for reporting"""
+        config = self.domains.get(domain_id)
+        if not config:
+            return None
+        return {
+            'domain_id': config.domain_id,
+            'display_name': config.display_name,
+            'description': config.description,
+            'core_skills_count': len(config.core_skills),
+            'aspects_count': len(config.aspect_prototypes),
+            'benchmarks': config.industry_benchmarks
+        }
+# Singleton instance
+_dkb_instance: Optional[DomainKnowledgeBase] = None
+def get_domain_knowledge_base(domains_dir: str = None) -> DomainKnowledgeBase:
+    """Get or create singleton DomainKnowledgeBase instance"""
+    global _dkb_instance
+    if _dkb_instance is None:
+        _dkb_instance = DomainKnowledgeBase(domains_dir)
+    return _dkb_instance

services/domain_plugins/__init__.py ADDED Viewed

	@@ -0,0 +1,10 @@

+"""Domain-Specific Plugins Module
+Pluggable architecture for domain-specific scoring (Tech, Business, Creative, Research).
+Each plugin returns domain_score, domain_confidence, and raw features.
+"""
+from .base_plugin import BaseDomainPlugin
+from .plugin_factory import DomainPluginFactory
+__all__ = ['BaseDomainPlugin', 'DomainPluginFactory']

services/domain_plugins/base_plugin.py ADDED Viewed

	@@ -0,0 +1,117 @@

+"""Base Domain Plugin Interface"""
+from abc import ABC, abstractmethod
+from typing import Dict, Tuple, List, Optional
+from dataclasses import dataclass
+@dataclass
+class DomainScore:
+    """Standardized domain scoring output"""
+    domain_type: str
+    score: float  # 0-1
+    confidence: float  # 0-1
+    raw_features: Dict  # Raw feature values for explainability
+    processing_time_ms: float
+    def to_dict(self):
+        return {
+            'domain_type': self.domain_type,
+            'score': round(self.score, 3),
+            'confidence': round(self.confidence, 3),
+            'raw_features': self.raw_features,
+            'processing_time_ms': round(self.processing_time_ms, 2)
+        }
+class BaseDomainPlugin(ABC):
+    """Abstract base class for all domain plugins"""
+    def __init__(self):
+        self.domain_type = self._get_domain_type()
+        self.feature_weights = self._get_feature_weights()
+    @abstractmethod
+    def _get_domain_type(self) -> str:
+        """Return domain identifier (e.g., 'tech', 'business')"""
+        pass
+    @abstractmethod
+    def _get_feature_weights(self) -> Dict[str, float]:
+        """Return feature name to weight mapping"""
+        pass
+    @abstractmethod
+    def get_required_fields(self) -> List[str]:
+        """Return list of required input fields for this domain"""
+        pass
+    @abstractmethod
+    def get_optional_fields(self) -> List[str]:
+        """Return list of optional input fields"""
+        pass
+    def validate_inputs(self, evidence_data: Dict) -> Tuple[bool, Optional[str]]:
+        """
+        Validate input data completeness
+        Returns: (is_valid, error_message)
+        """
+        required = self.get_required_fields()
+        missing = [f for f in required if not evidence_data.get(f)]
+        if missing:
+            return False, f"Missing required fields: {', '.join(missing)}"
+        return True, None
+    @abstractmethod
+    def score(self, evidence_data: Dict) -> DomainScore:
+        """
+        Main scoring method - must be implemented by each plugin
+        Args:
+            evidence_data: Dictionary containing domain-specific inputs
+        Returns:
+            DomainScore object with score, confidence, and features
+        """
+        pass
+    def explain(self, features: Dict) -> Dict:
+        """Generate human-readable explanation of scoring"""
+        explanations = {
+            'top_features': [],
+            'recommendations': []
+        }
+        # Sort features by value
+        sorted_features = sorted(features.items(), key=lambda x: x[1], reverse=True)
+        # Top 3 features
+        for feat, val in sorted_features[:3]:
+            if val > 0.3:
+                explanations['top_features'].append({
+                    'feature': feat,
+                    'value': round(val, 2),
+                    'weight': self.feature_weights.get(feat, 0)
+                })
+        return explanations
+    def calculate_confidence(self, evidence_data: Dict) -> float:
+        """
+        Calculate confidence based on data completeness and quality
+        Returns: 0-1 confidence score
+        """
+        required_fields = self.get_required_fields()
+        optional_fields = self.get_optional_fields()
+        total_fields = len(required_fields) + len(optional_fields)
+        filled_required = sum(1 for f in required_fields if evidence_data.get(f))
+        filled_optional = sum(1 for f in optional_fields if evidence_data.get(f))
+        # Base confidence from required fields (70%)
+        required_confidence = (filled_required / len(required_fields)) * 0.7 if required_fields else 0.7
+        # Bonus from optional fields (30%)
+        optional_confidence = (filled_optional / len(optional_fields)) * 0.3 if optional_fields else 0.3
+        return min(required_confidence + optional_confidence, 1.0)

services/domain_plugins/business_plugin.py ADDED Viewed

	@@ -0,0 +1,205 @@

+"""Business/Finance Domain Plugin
+Scores business competency based on:
+- Resume content (ATS-style keyword matching)
+- Case study submission analysis
+- Excel/analytical test scores
+- Internship experience in business domains
+"""
+import re
+import time
+import logging
+from typing import Dict, List
+from .base_plugin import BaseDomainPlugin, DomainScore
+from .plugin_factory import register_plugin
+logger = logging.getLogger(__name__)
+@register_plugin('business')
+class BusinessPlugin(BaseDomainPlugin):
+    """Business/Finance domain scoring plugin"""
+    def __init__(self):
+        super().__init__()
+        # Business-relevant keywords
+        self.business_keywords = {
+            'consulting': ['consulting', 'consultant', 'advisory', 'strategy', 'mckinsey', 'bain', 'bcg'],
+            'finance': ['finance', 'banking', 'investment', 'equity', 'portfolio', 'analyst', 'goldman', 'morgan'],
+            'analytics': ['data analysis', 'business intelligence', 'tableau', 'power bi', 'sql', 'excel'],
+            'management': ['project management', 'product management', 'stakeholder', 'agile', 'scrum'],
+            'sales': ['sales', 'business development', 'client acquisition', 'revenue', 'crm'],
+            'operations': ['operations', 'supply chain', 'logistics', 'process improvement', 'lean', 'six sigma']
+        }
+    def _get_domain_type(self) -> str:
+        return 'business'
+    def _get_feature_weights(self) -> Dict[str, float]:
+        return {
+            'resume_keyword_score': 0.30,
+            'internship_relevance': 0.25,
+            'case_study_score': 0.20,
+            'excel_test_score': 0.15,
+            'business_depth': 0.10
+        }
+    def get_required_fields(self) -> List[str]:
+        return ['resume_text']  # Resume text (extracted from PDF)
+    def get_optional_fields(self) -> List[str]:
+        return ['case_study_text', 'excel_test_score', 'internship_descriptions']
+    def score(self, evidence_data: Dict) -> DomainScore:
+        """Calculate business domain score"""
+        start_time = time.time()
+        features = {}
+        # Resume keyword analysis
+        resume_text = evidence_data.get('resume_text', '')
+        if resume_text:
+            features['resume_keyword_score'] = self._analyze_resume_keywords(resume_text)
+            features['internship_relevance'] = self._extract_internship_relevance(resume_text)
+            features['business_depth'] = self._assess_business_depth(resume_text)
+        else:
+            features['resume_keyword_score'] = 0.0
+            features['internship_relevance'] = 0.0
+            features['business_depth'] = 0.0
+        # Case study analysis
+        case_study = evidence_data.get('case_study_text', '')
+        if case_study:
+            features['case_study_score'] = self._analyze_case_study(case_study)
+        else:
+            features['case_study_score'] = 0.0
+        # Excel test score (normalized 0-100 to 0-1)
+        excel_score = evidence_data.get('excel_test_score', 0)
+        features['excel_test_score'] = min(excel_score / 100, 1.0) if excel_score else 0.0
+        # Calculate weighted score
+        score = sum(features[k] * self.feature_weights[k] for k in features.keys())
+        # Calculate confidence
+        confidence = self.calculate_confidence(evidence_data)
+        processing_time = (time.time() - start_time) * 1000
+        return DomainScore(
+            domain_type='business',
+            score=min(score, 1.0),
+            confidence=confidence,
+            raw_features=features,
+            processing_time_ms=processing_time
+        )
+    def _analyze_resume_keywords(self, resume_text: str) -> float:
+        """
+        ATS-style keyword matching for business roles
+        Returns: 0-1 score based on keyword density and relevance
+        """
+        text_lower = resume_text.lower()
+        # Count keywords in each category
+        category_scores = {}
+        for category, keywords in self.business_keywords.items():
+            matches = sum(1 for kw in keywords if kw in text_lower)
+            category_scores[category] = min(matches / len(keywords), 1.0)
+        # Average across categories with some categories weighted more
+        weights = {
+            'consulting': 0.20,
+            'finance': 0.20,
+            'analytics': 0.20,
+            'management': 0.15,
+            'sales': 0.15,
+            'operations': 0.10
+        }
+        score = sum(category_scores.get(cat, 0) * weight for cat, weight in weights.items())
+        logger.info(f"Resume keyword score: {score:.2f} (categories: {category_scores})")
+        return score
+    def _extract_internship_relevance(self, resume_text: str) -> float:
+        """
+        Extract and score internship relevance to business
+        Returns: 0-1 score based on business-related internships
+        """
+        text_lower = resume_text.lower()
+        # Internship indicators
+        internship_patterns = [
+            r'intern(?:ship)?\s+at\s+([^\n]+)',
+            r'(?:summer|winter)\s+intern',
+            r'([a-z\s]+)\s+intern'
+        ]
+        internship_mentions = []
+        for pattern in internship_patterns:
+            matches = re.findall(pattern, text_lower)
+            internship_mentions.extend(matches)
+        if not internship_mentions:
+            return 0.0
+        # Score based on business keyword overlap in internship context
+        business_internship_score = 0.0
+        for mention in internship_mentions[:5]:  # Top 5 internships
+            mention_text = mention if isinstance(mention, str) else ' '.join(mention)
+            for category, keywords in self.business_keywords.items():
+                if any(kw in mention_text for kw in keywords):
+                    business_internship_score += 0.2
+        score = min(business_internship_score, 1.0)
+        logger.info(f"Internship relevance: {score:.2f}")
+        return score
+    def _assess_business_depth(self, resume_text: str) -> float:
+        """
+        Assess overall business knowledge depth
+        Returns: 0-1 score based on technical business terms
+        """
+        text_lower = resume_text.lower()
+        # Advanced business terms
+        advanced_terms = [
+            'financial modeling', 'valuation', 'dcf', 'market research',
+            'competitive analysis', 'business plan', 'roi', 'kpi',
+            'p&l', 'balance sheet', 'cash flow', 'stakeholder management',
+            'go-to-market', 'pricing strategy', 'market segmentation'
+        ]
+        term_count = sum(1 for term in advanced_terms if term in text_lower)
+        score = min(term_count / 10, 1.0)  # 10+ terms = max
+        logger.info(f"Business depth score: {score:.2f} ({term_count} advanced terms)")
+        return score
+    def _analyze_case_study(self, case_study_text: str) -> float:
+        """
+        Analyze case study submission quality
+        Returns: 0-1 score based on structure and depth
+        """
+        if not case_study_text or len(case_study_text) < 100:
+            return 0.0
+        score = 0.0
+        text_lower = case_study_text.lower()
+        # Structure indicators
+        structure_keywords = ['problem', 'analysis', 'solution', 'recommendation', 'conclusion']
+        structure_score = sum(0.1 for kw in structure_keywords if kw in text_lower)
+        score += min(structure_score, 0.4)
+        # Analytical depth
+        analytical_terms = ['data', 'metric', 'assumption', 'framework', 'hypothesis', 'evidence']
+        analytical_score = sum(0.05 for term in analytical_terms if term in text_lower)
+        score += min(analytical_score, 0.3)
+        # Length (quality proxy)
+        length_score = min(len(case_study_text) / 2000, 0.3)  # 2000+ chars = max
+        score += length_score
+        logger.info(f"Case study score: {score:.2f}")
+        return min(score, 1.0)

services/domain_plugins/creative_plugin.py ADDED Viewed

	@@ -0,0 +1,210 @@

+"""Creative/Design Domain Plugin
+Scores creative competency based on:
+- Portfolio links (Behance, Dribbble, personal site)
+- Project diversity and quality
+- Design tool proficiency
+- Visual content analysis
+"""
+import re
+import time
+import logging
+import requests
+from typing import Dict, List
+from .base_plugin import BaseDomainPlugin, DomainScore
+from .plugin_factory import register_plugin
+logger = logging.getLogger(__name__)
+@register_plugin('creative')
+class CreativePlugin(BaseDomainPlugin):
+    """Creative/Design domain scoring plugin"""
+    def __init__(self):
+        super().__init__()
+        # Design tools and platforms
+        self.design_tools = [
+            'figma', 'sketch', 'adobe xd', 'photoshop', 'illustrator',
+            'after effects', 'premiere pro', 'blender', 'cinema 4d'
+        ]
+        self.portfolio_platforms = ['behance', 'dribbble', 'artstation', 'deviantart']
+    def _get_domain_type(self) -> str:
+        return 'creative'
+    def _get_feature_weights(self) -> Dict[str, float]:
+        return {
+            'portfolio_quality': 0.35,
+            'project_diversity': 0.25,
+            'tool_proficiency': 0.20,
+            'platform_presence': 0.15,
+            'description_depth': 0.05
+        }
+    def get_required_fields(self) -> List[str]:
+        return ['portfolio_url']
+    def get_optional_fields(self) -> List[str]:
+        return ['behance_url', 'dribbble_url', 'design_tools_text', 'project_description']
+    def score(self, evidence_data: Dict) -> DomainScore:
+        """Calculate creative domain score"""
+        start_time = time.time()
+        features = {}
+        # Portfolio analysis
+        portfolio_url = evidence_data.get('portfolio_url', '')
+        if portfolio_url:
+            features['portfolio_quality'] = self._analyze_portfolio_quality(portfolio_url)
+        else:
+            features['portfolio_quality'] = 0.0
+        # Platform presence
+        behance_url = evidence_data.get('behance_url', '')
+        dribbble_url = evidence_data.get('dribbble_url', '')
+        features['platform_presence'] = self._check_platform_presence(behance_url, dribbble_url)
+        # Tool proficiency
+        tools_text = evidence_data.get('design_tools_text', '')
+        features['tool_proficiency'] = self._assess_tool_proficiency(tools_text)
+        # Project diversity and description
+        project_desc = evidence_data.get('project_description', '')
+        features['project_diversity'] = self._assess_project_diversity(project_desc)
+        features['description_depth'] = self._assess_description_depth(project_desc)
+        # Calculate weighted score
+        score = sum(features[k] * self.feature_weights[k] for k in features.keys())
+        # Calculate confidence
+        confidence = self.calculate_confidence(evidence_data)
+        processing_time = (time.time() - start_time) * 1000
+        return DomainScore(
+            domain_type='creative',
+            score=min(score, 1.0),
+            confidence=confidence,
+            raw_features=features,
+            processing_time_ms=processing_time
+        )
+    def _analyze_portfolio_quality(self, portfolio_url: str) -> float:
+        """
+        Analyze portfolio website quality
+        Returns: 0-1 score based on accessibility and professionalism
+        """
+        try:
+            if not portfolio_url.startswith(('http://', 'https://')):
+                portfolio_url = 'https://' + portfolio_url
+            response = requests.head(portfolio_url, timeout=5, allow_redirects=True)
+            if response.status_code == 200:
+                score = 0.6  # Base score for accessible portfolio
+                # Bonus for professional platforms
+                if any(platform in portfolio_url for platform in self.portfolio_platforms):
+                    score += 0.2
+                # Bonus for custom domain
+                if not any(free in portfolio_url for free in ['github.io', 'wixsite', 'wordpress.com']):
+                    score += 0.2
+                logger.info(f"Portfolio quality: {score:.2f}")
+                return min(score, 1.0)
+            else:
+                return 0.2
+        except Exception as e:
+            logger.error(f"Error analyzing portfolio: {e}")
+            return 0.2
+    def _check_platform_presence(self, behance_url: str, dribbble_url: str) -> float:
+        """
+        Check presence on design platforms
+        Returns: 0-1 score based on platform profiles
+        """
+        score = 0.0
+        # Behance presence
+        if behance_url and 'behance.net' in behance_url:
+            try:
+                response = requests.head(behance_url, timeout=5, allow_redirects=True)
+                if response.status_code == 200:
+                    score += 0.5
+            except:
+                score += 0.2  # Partial credit for providing URL
+        # Dribbble presence
+        if dribbble_url and 'dribbble.com' in dribbble_url:
+            try:
+                response = requests.head(dribbble_url, timeout=5, allow_redirects=True)
+                if response.status_code == 200:
+                    score += 0.5
+            except:
+                score += 0.2
+        logger.info(f"Platform presence: {score:.2f}")
+        return min(score, 1.0)
+    def _assess_tool_proficiency(self, tools_text: str) -> float:
+        """
+        Assess design tool proficiency
+        Returns: 0-1 score based on tool mentions
+        """
+        if not tools_text:
+            return 0.0
+        text_lower = tools_text.lower()
+        # Count tool mentions
+        tool_count = sum(1 for tool in self.design_tools if tool in text_lower)
+        # Score based on tool diversity
+        score = min(tool_count / 5, 1.0)  # 5+ tools = max
+        # Bonus for professional tools (Adobe, Figma)
+        pro_tools = ['figma', 'adobe', 'sketch']
+        if any(tool in text_lower for tool in pro_tools):
+            score = min(score + 0.2, 1.0)
+        logger.info(f"Tool proficiency: {score:.2f} ({tool_count} tools)")
+        return score
+    def _assess_project_diversity(self, project_desc: str) -> float:
+        """
+        Assess project type diversity
+        Returns: 0-1 score based on project variety
+        """
+        if not project_desc:
+            return 0.0
+        text_lower = project_desc.lower()
+        # Project type categories
+        project_types = [
+            'ui design', 'ux design', 'branding', 'logo', 'illustration',
+            'animation', '3d', 'web design', 'mobile app', 'poster',
+            'packaging', 'typography', 'infographic', 'video editing'
+        ]
+        type_count = sum(1 for ptype in project_types if ptype in text_lower)
+        score = min(type_count / 6, 1.0)  # 6+ types = max
+        logger.info(f"Project diversity: {score:.2f} ({type_count} types)")
+        return score
+    def _assess_description_depth(self, project_desc: str) -> float:
+        """
+        Assess depth of project descriptions
+        Returns: 0-1 score based on detail level
+        """
+        if not project_desc or len(project_desc) < 50:
+            return 0.0
+        score = min(len(project_desc) / 1000, 1.0)  # 1000+ chars = max
+        logger.info(f"Description depth: {score:.2f}")
+        return score

services/domain_plugins/plugin_factory.py ADDED Viewed

	@@ -0,0 +1,73 @@

+"""Plugin Factory for Domain-Specific Scoring"""
+import logging
+from typing import Dict, Optional, List
+from .base_plugin import BaseDomainPlugin
+logger = logging.getLogger(__name__)
+class DomainPluginFactory:
+    """Factory for registering and instantiating domain plugins"""
+    _plugins: Dict[str, type] = {}
+    _instances: Dict[str, BaseDomainPlugin] = {}  # Singleton instances
+    @classmethod
+    def register(cls, domain_type: str, plugin_class: type):
+        """Register a plugin class"""
+        if not issubclass(plugin_class, BaseDomainPlugin):
+            raise TypeError(f"{plugin_class} must inherit from BaseDomainPlugin")
+        cls._plugins[domain_type] = plugin_class
+        logger.info(f"Registered domain plugin: {domain_type}")
+    @classmethod
+    def get_plugin(cls, domain_type: str) -> Optional[BaseDomainPlugin]:
+        """Get plugin instance (singleton pattern)"""
+        if domain_type not in cls._plugins:
+            logger.warning(f"Plugin not found: {domain_type}")
+            return None
+        # Return cached instance or create new one
+        if domain_type not in cls._instances:
+            cls._instances[domain_type] = cls._plugins[domain_type]()
+        return cls._instances[domain_type]
+    @classmethod
+    def list_available_domains(cls) -> List[str]:
+        """List all registered domain types"""
+        return list(cls._plugins.keys())
+    @classmethod
+    def is_domain_available(cls, domain_type: str) -> bool:
+        """Check if domain plugin is registered"""
+        return domain_type in cls._plugins
+    @classmethod
+    def get_domain_info(cls, domain_type: str) -> Optional[Dict]:
+        """Get domain plugin information"""
+        plugin = cls.get_plugin(domain_type)
+        if not plugin:
+            return None
+        return {
+            'domain_type': plugin.domain_type,
+            'required_fields': plugin.get_required_fields(),
+            'optional_fields': plugin.get_optional_fields(),
+            'feature_weights': plugin.feature_weights
+        }
+    @classmethod
+    def clear_cache(cls):
+        """Clear singleton instances (useful for testing)"""
+        cls._instances.clear()
+# Auto-registration helper decorator
+def register_plugin(domain_type: str):
+    """Decorator to auto-register plugins"""
+    def decorator(plugin_class):
+        DomainPluginFactory.register(domain_type, plugin_class)
+        return plugin_class
+    return decorator

services/domain_plugins/research_plugin.py ADDED Viewed

	@@ -0,0 +1,228 @@

+"""Research/Science Domain Plugin
+Scores research competency based on:
+- Publication record (papers, citations)
+- Lab experience and duration
+- Research project depth
+- Thesis/dissertation summaries
+"""
+import re
+import time
+import logging
+from typing import Dict, List
+from .base_plugin import BaseDomainPlugin, DomainScore
+from .plugin_factory import register_plugin
+logger = logging.getLogger(__name__)
+@register_plugin('research')
+class ResearchPlugin(BaseDomainPlugin):
+    """Research/Science domain scoring plugin"""
+    def __init__(self):
+        super().__init__()
+        # Research indicators
+        self.publication_venues = [
+            'journal', 'conference', 'proceedings', 'ieee', 'acm',
+            'springer', 'elsevier', 'nature', 'science', 'arxiv'
+        ]
+        self.research_methods = [
+            'experiment', 'methodology', 'hypothesis', 'literature review',
+            'data collection', 'statistical analysis', 'simulation', 'survey'
+        ]
+    def _get_domain_type(self) -> str:
+        return 'research'
+    def _get_feature_weights(self) -> Dict[str, float]:
+        return {
+            'publication_score': 0.35,
+            'lab_experience_score': 0.25,
+            'research_depth_score': 0.25,
+            'thesis_quality_score': 0.15
+        }
+    def get_required_fields(self) -> List[str]:
+        return ['research_description']
+    def get_optional_fields(self) -> List[str]:
+        return ['publications_text', 'lab_experience_text', 'thesis_summary']
+    def score(self, evidence_data: Dict) -> DomainScore:
+        """Calculate research domain score"""
+        start_time = time.time()
+        features = {}
+        # Publication analysis
+        publications = evidence_data.get('publications_text', '')
+        features['publication_score'] = self._analyze_publications(publications)
+        # Lab experience
+        lab_exp = evidence_data.get('lab_experience_text', '')
+        features['lab_experience_score'] = self._analyze_lab_experience(lab_exp)
+        # Research depth from main description
+        research_desc = evidence_data.get('research_description', '')
+        features['research_depth_score'] = self._analyze_research_depth(research_desc)
+        # Thesis quality
+        thesis = evidence_data.get('thesis_summary', '')
+        features['thesis_quality_score'] = self._analyze_thesis(thesis)
+        # Calculate weighted score
+        score = sum(features[k] * self.feature_weights[k] for k in features.keys())
+        # Calculate confidence
+        confidence = self.calculate_confidence(evidence_data)
+        processing_time = (time.time() - start_time) * 1000
+        return DomainScore(
+            domain_type='research',
+            score=min(score, 1.0),
+            confidence=confidence,
+            raw_features=features,
+            processing_time_ms=processing_time
+        )
+    def _analyze_publications(self, publications_text: str) -> float:
+        """
+        Analyze publication record
+        Returns: 0-1 score based on number and quality of publications
+        """
+        if not publications_text or len(publications_text) < 30:
+            return 0.0
+        text_lower = publications_text.lower()
+        score = 0.0
+        # Count publication mentions (by common patterns)
+        # Pattern: "Paper title" or [1] Reference format
+        title_patterns = [
+            r'"([^"]+)"',  # Quoted titles
+            r'\[\d+\]',     # Numbered references
+            r'\d{4}\.\s',   # Year format (2023. Title...)
+        ]
+        pub_count = 0
+        for pattern in title_patterns:
+            matches = re.findall(pattern, publications_text)
+            pub_count = max(pub_count, len(matches))
+        # Score based on publication count
+        count_score = min(pub_count / 5, 0.6)  # 5+ pubs = 0.6
+        score += count_score
+        # Venue quality bonus
+        venue_count = sum(1 for venue in self.publication_venues if venue in text_lower)
+        venue_score = min(venue_count / 3, 0.4)  # 3+ venues = 0.4
+        score += venue_score
+        logger.info(f"Publication score: {score:.2f} ({pub_count} pubs, {venue_count} venues)")
+        return min(score, 1.0)
+    def _analyze_lab_experience(self, lab_text: str) -> float:
+        """
+        Analyze laboratory experience
+        Returns: 0-1 score based on duration and depth
+        """
+        if not lab_text or len(lab_text) < 30:
+            return 0.0
+        text_lower = lab_text.lower()
+        score = 0.0
+        # Extract duration (months/years)
+        duration_patterns = [
+            (r'(\d+)\s*years?', 12),  # Convert years to months
+            (r'(\d+)\s*months?', 1),
+        ]
+        max_duration = 0
+        for pattern, multiplier in duration_patterns:
+            matches = re.findall(pattern, text_lower)
+            if matches:
+                duration = max([int(m) * multiplier for m in matches])
+                max_duration = max(max_duration, duration)
+        # Duration score (12 months = max)
+        duration_score = min(max_duration / 12, 0.5)
+        score += duration_score
+        # Lab quality indicators
+        quality_keywords = ['research lab', 'professor', 'phd', 'equipment', 'experiment', 'protocol']
+        quality_count = sum(1 for kw in quality_keywords if kw in text_lower)
+        quality_score = min(quality_count / 4, 0.5)
+        score += quality_score
+        logger.info(f"Lab experience: {score:.2f} ({max_duration} months)")
+        return min(score, 1.0)
+    def _analyze_research_depth(self, research_desc: str) -> float:
+        """
+        Analyze research methodology depth
+        Returns: 0-1 score based on methodology sophistication
+        """
+        if not research_desc or len(research_desc) < 50:
+            return 0.0
+        text_lower = research_desc.lower()
+        score = 0.0
+        # Research method mentions
+        method_count = sum(1 for method in self.research_methods if method in text_lower)
+        method_score = min(method_count / 4, 0.5)
+        score += method_score
+        # Technical depth indicators
+        technical_terms = [
+            'algorithm', 'model', 'framework', 'dataset', 'validation',
+            'baseline', 'benchmark', 'evaluation', 'metrics', 'results'
+        ]
+        tech_count = sum(1 for term in technical_terms if term in text_lower)
+        tech_score = min(tech_count / 5, 0.3)
+        score += tech_score
+        # Length as depth proxy
+        length_score = min(len(research_desc) / 1000, 0.2)
+        score += length_score
+        logger.info(f"Research depth: {score:.2f}")
+        return min(score, 1.0)
+    def _analyze_thesis(self, thesis_text: str) -> float:
+        """
+        Analyze thesis/dissertation quality
+        Returns: 0-1 score based on structure and depth
+        """
+        if not thesis_text or len(thesis_text) < 100:
+            return 0.0
+        text_lower = thesis_text.lower()
+        score = 0.0
+        # Thesis structure keywords
+        structure_keywords = [
+            'abstract', 'introduction', 'methodology', 'results',
+            'discussion', 'conclusion', 'references', 'chapter'
+        ]
+        structure_count = sum(1 for kw in structure_keywords if kw in text_lower)
+        structure_score = min(structure_count / 5, 0.5)
+        score += structure_score
+        # Academic rigor indicators
+        rigor_keywords = [
+            'research question', 'objective', 'contribution', 'limitation',
+            'future work', 'significance', 'novelty', 'finding'
+        ]
+        rigor_count = sum(1 for kw in rigor_keywords if kw in text_lower)
+        rigor_score = min(rigor_count / 4, 0.3)
+        score += rigor_score
+        # Length bonus
+        length_score = min(len(thesis_text) / 2000, 0.2)
+        score += length_score
+        logger.info(f"Thesis quality: {score:.2f}")
+        return min(score, 1.0)

services/domain_plugins/tech_plugin.py ADDED Viewed

	@@ -0,0 +1,270 @@

+"""Tech/CS Domain Plugin
+Scores technical competency based on:
+- GitHub activity (commits, repos, stars, descriptions)
+- LeetCode profile (problems solved, ranking)
+- Portfolio links (project depth analysis)
+"""
+import re
+import time
+import logging
+import requests
+from typing import Dict, List
+from .base_plugin import BaseDomainPlugin, DomainScore
+from .plugin_factory import register_plugin
+logger = logging.getLogger(__name__)
+@register_plugin('tech')
+class TechPlugin(BaseDomainPlugin):
+    """Technical domain scoring plugin"""
+    def _get_domain_type(self) -> str:
+        return 'tech'
+    def _get_feature_weights(self) -> Dict[str, float]:
+        return {
+            'github_activity_score': 0.30,
+            'github_repo_quality': 0.20,
+            'leetcode_score': 0.25,
+            'portfolio_depth': 0.15,
+            'recent_activity': 0.10
+        }
+    def get_required_fields(self) -> List[str]:
+        return ['github_url']  # At least GitHub is required
+    def get_optional_fields(self) -> List[str]:
+        return ['leetcode_handle', 'portfolio_url', 'linkedin_url']
+    def score(self, evidence_data: Dict) -> DomainScore:
+        """Calculate tech domain score"""
+        start_time = time.time()
+        features = {}
+        # GitHub analysis
+        github_url = evidence_data.get('github_url', '')
+        if github_url:
+            features['github_activity_score'] = self._analyze_github_activity(github_url)
+            features['github_repo_quality'] = self._analyze_repo_quality(github_url)
+            features['recent_activity'] = self._check_recent_commits(github_url)
+        else:
+            features['github_activity_score'] = 0.0
+            features['github_repo_quality'] = 0.0
+            features['recent_activity'] = 0.0
+        # LeetCode analysis
+        leetcode_handle = evidence_data.get('leetcode_handle', '')
+        if leetcode_handle:
+            features['leetcode_score'] = self._analyze_leetcode(leetcode_handle)
+        else:
+            features['leetcode_score'] = 0.0
+        # Portfolio analysis
+        portfolio_url = evidence_data.get('portfolio_url', '')
+        if portfolio_url:
+            features['portfolio_depth'] = self._analyze_portfolio(portfolio_url)
+        else:
+            features['portfolio_depth'] = 0.0
+        # Calculate weighted score
+        score = sum(features[k] * self.feature_weights[k] for k in features.keys())
+        # Calculate confidence
+        confidence = self.calculate_confidence(evidence_data)
+        processing_time = (time.time() - start_time) * 1000
+        return DomainScore(
+            domain_type='tech',
+            score=min(score, 1.0),
+            confidence=confidence,
+            raw_features=features,
+            processing_time_ms=processing_time
+        )
+    def _analyze_github_activity(self, github_url: str) -> float:
+        """
+        Analyze GitHub profile activity
+        Returns: 0-1 score based on public repos, commits, contributions
+        """
+        try:
+            username = self._extract_github_username(github_url)
+            if not username:
+                return 0.0
+            # GitHub API endpoint
+            api_url = f"https://api.github.com/users/{username}"
+            headers = {'Accept': 'application/vnd.github.v3+json'}
+            response = requests.get(api_url, headers=headers, timeout=5)
+            if response.status_code != 200:
+                logger.warning(f"GitHub API error for {username}: {response.status_code}")
+                return 0.3  # Fallback score if API fails
+            data = response.json()
+            # Extract metrics
+            public_repos = data.get('public_repos', 0)
+            followers = data.get('followers', 0)
+            following = data.get('following', 0)
+            # Simple scoring heuristic
+            repo_score = min(public_repos / 20, 1.0) * 0.5  # 20+ repos = max
+            follower_score = min(followers / 50, 1.0) * 0.3  # 50+ followers = max
+            engagement_score = min((followers + following) / 100, 1.0) * 0.2
+            total_score = repo_score + follower_score + engagement_score
+            logger.info(f"GitHub activity for {username}: {total_score:.2f}")
+            return total_score
+        except Exception as e:
+            logger.error(f"Error analyzing GitHub activity: {e}")
+            return 0.3  # Fallback score
+    def _analyze_repo_quality(self, github_url: str) -> float:
+        """
+        Analyze quality of top repositories
+        Returns: 0-1 score based on stars, forks, descriptions
+        """
+        try:
+            username = self._extract_github_username(github_url)
+            if not username:
+                return 0.0
+            # Get repos sorted by stars
+            api_url = f"https://api.github.com/users/{username}/repos?sort=stars&per_page=10"
+            headers = {'Accept': 'application/vnd.github.v3+json'}
+            response = requests.get(api_url, headers=headers, timeout=5)
+            if response.status_code != 200:
+                return 0.3
+            repos = response.json()
+            if not repos:
+                return 0.0
+            # Analyze top repos
+            total_stars = sum(r.get('stargazers_count', 0) for r in repos[:5])
+            total_forks = sum(r.get('forks_count', 0) for r in repos[:5])
+            has_descriptions = sum(1 for r in repos[:5] if r.get('description'))
+            has_readmes = sum(1 for r in repos[:5] if r.get('has_wiki') or r.get('has_pages'))
+            star_score = min(total_stars / 50, 1.0) * 0.4  # 50+ stars = max
+            fork_score = min(total_forks / 20, 1.0) * 0.2  # 20+ forks = max
+            desc_score = (has_descriptions / 5) * 0.2
+            readme_score = (has_readmes / 5) * 0.2
+            total_score = star_score + fork_score + desc_score + readme_score
+            logger.info(f"GitHub repo quality for {username}: {total_score:.2f}")
+            return total_score
+        except Exception as e:
+            logger.error(f"Error analyzing repo quality: {e}")
+            return 0.3
+    def _check_recent_commits(self, github_url: str) -> float:
+        """
+        Check for recent activity (commits in last 90 days)
+        Returns: 0-1 score based on recency
+        """
+        try:
+            username = self._extract_github_username(github_url)
+            if not username:
+                return 0.0
+            # Get recent events
+            api_url = f"https://api.github.com/users/{username}/events/public?per_page=30"
+            headers = {'Accept': 'application/vnd.github.v3+json'}
+            response = requests.get(api_url, headers=headers, timeout=5)
+            if response.status_code != 200:
+                return 0.5  # Neutral fallback
+            events = response.json()
+            # Count push events (commits) in last 90 days
+            from datetime import datetime, timedelta
+            ninety_days_ago = datetime.now() - timedelta(days=90)
+            recent_commits = 0
+            for event in events:
+                if event.get('type') == 'PushEvent':
+                    created_at = datetime.strptime(event['created_at'], '%Y-%m-%dT%H:%M:%SZ')
+                    if created_at > ninety_days_ago:
+                        recent_commits += 1
+            # Score based on commit frequency
+            score = min(recent_commits / 20, 1.0)  # 20+ commits in 90 days = max
+            logger.info(f"Recent activity for {username}: {score:.2f} ({recent_commits} commits)")
+            return score
+        except Exception as e:
+            logger.error(f"Error checking recent activity: {e}")
+            return 0.5
+    def _analyze_leetcode(self, leetcode_handle: str) -> float:
+        """
+        Analyze LeetCode profile
+        Returns: 0-1 score based on problems solved and ranking
+        Note: LeetCode doesn't have an official public API, so this uses heuristic fallback
+        In production, consider using unofficial APIs or web scraping with proper rate limiting
+        """
+        try:
+            # Placeholder for LeetCode analysis
+            # In real implementation, would scrape profile or use unofficial API
+            # For now, return medium score if handle is provided
+            logger.info(f"LeetCode handle provided: {leetcode_handle}")
+            return 0.5  # Neutral score when handle exists
+        except Exception as e:
+            logger.error(f"Error analyzing LeetCode: {e}")
+            return 0.0
+    def _analyze_portfolio(self, portfolio_url: str) -> float:
+        """
+        Analyze portfolio website
+        Returns: 0-1 score based on presence and basic checks
+        """
+        try:
+            # Basic URL validation
+            if not portfolio_url.startswith(('http://', 'https://')):
+                portfolio_url = 'https://' + portfolio_url
+            # Check if URL is accessible
+            response = requests.head(portfolio_url, timeout=5, allow_redirects=True)
+            if response.status_code == 200:
+                # Portfolio exists and is accessible
+                score = 0.7
+                # Bonus for custom domain (not github.io, netlify.app, etc.)
+                if not any(host in portfolio_url for host in ['github.io', 'netlify.app', 'vercel.app', 'repl.it']):
+                    score += 0.3
+                logger.info(f"Portfolio accessible: {portfolio_url} (score: {score})")
+                return min(score, 1.0)
+            else:
+                logger.warning(f"Portfolio not accessible: {portfolio_url}")
+                return 0.2  # Some credit for providing URL
+        except Exception as e:
+            logger.error(f"Error analyzing portfolio: {e}")
+            return 0.2
+    def _extract_github_username(self, github_url: str) -> str:
+        """Extract username from GitHub URL"""
+        # Handle formats: https://github.com/username or github.com/username
+        pattern = r'github\.com/([a-zA-Z0-9_-]+)'
+        match = re.search(pattern, github_url)
+        return match.group(1) if match else ''

services/fidelity_transformer.py ADDED Viewed

	@@ -0,0 +1,481 @@

+"""
+Fidelity Criteria Transformer - Enhanced aspect extraction with semantic validation
+"""
+import os
+import logging
+import numpy as np
+from typing import Dict, List, Tuple, Optional, Any
+from dataclasses import dataclass
+logger = logging.getLogger(__name__)
+# Try importing sentence transformers
+try:
+    from sentence_transformers import SentenceTransformer
+    HAS_SBERT = True
+except ImportError:
+    HAS_SBERT = False
+    logger.warning("sentence-transformers not installed, using fallback")
+# Try importing domain knowledge base
+try:
+    from .domain_knowledge_base import get_domain_knowledge_base, DomainKnowledgeBase
+    HAS_DKB = True
+except ImportError:
+    HAS_DKB = False
+    logger.warning("DomainKnowledgeBase not available")
+@dataclass
+class FidelityScore:
+    """Fidelity assessment result"""
+    score: float  # 0-1 overall fidelity
+    coherence: float  # Semantic coherence
+    coverage: float  # Aspect coverage
+    depth: float  # Content depth
+    issues: List[str]  # List of detected issues
+@dataclass
+class AspectExtractionResult:
+    """Result of aspect extraction for a text"""
+    aspects: Dict[str, float]  # aspect -> score
+    chunk_evidence: Dict[str, List[str]]  # aspect -> supporting chunks
+    fidelity: FidelityScore
+    detected_domain: str
+    domain_confidence: float
+class FidelityScorer:
+    """
+    Semantic fidelity scoring for text responses
+    Validates response quality against expected patterns
+    """
+    # Generic/copy-paste patterns to detect
+    GENERIC_PATTERNS = [
+        "i am a hard worker",
+        "i have good communication skills",
+        "i am a team player",
+        "i want to learn and grow",
+        "i am passionate about",
+        "looking for opportunities",
+        "seeking challenging role"
+    ]
+    # Minimum thresholds
+    MIN_WORD_COUNT = 30
+    IDEAL_WORD_COUNT = 150
+    MAX_WORD_COUNT = 500
+    def __init__(self):
+        self.generic_patterns = [p.lower() for p in self.GENERIC_PATTERNS]
+    def score(self, text: str, aspect_scores: Dict[str, float],
+              expected_aspects: List[str] = None) -> FidelityScore:
+        """
+        Compute fidelity score for a text response
+        Args:
+            text: The text to evaluate
+            aspect_scores: Scores from aspect extraction
+            expected_aspects: Aspects expected in this response
+        Returns:
+            FidelityScore with detailed breakdown
+        """
+        issues = []
+        if not text or len(text.strip()) < 10:
+            return FidelityScore(
+                score=0.0, coherence=0.0, coverage=0.0, depth=0.0,
+                issues=["Response is too short or empty"]
+            )
+        text_lower = text.lower()
+        word_count = len(text.split())
+        # 1. Content Depth Score
+        if word_count < self.MIN_WORD_COUNT:
+            depth = 0.2
+            issues.append(f"Response too short ({word_count} words, minimum {self.MIN_WORD_COUNT})")
+        elif word_count < self.IDEAL_WORD_COUNT:
+            depth = 0.5 + 0.3 * (word_count - self.MIN_WORD_COUNT) / (self.IDEAL_WORD_COUNT - self.MIN_WORD_COUNT)
+        elif word_count <= self.MAX_WORD_COUNT:
+            depth = 1.0
+        else:
+            depth = 0.9  # Slightly penalize overly long responses
+            issues.append("Response is longer than recommended")
+        # 2. Generic Pattern Detection
+        generic_count = sum(1 for p in self.generic_patterns if p in text_lower)
+        coherence_penalty = min(generic_count * 0.1, 0.4)
+        if generic_count > 2:
+            issues.append(f"Contains {generic_count} generic phrases")
+        # 3. Coherence Score (based on sentence structure and vocabulary)
+        sentences = [s.strip() for s in text.split('.') if len(s.strip()) > 10]
+        if len(sentences) < 2:
+            coherence = 0.4
+            issues.append("Response lacks proper sentence structure")
+        else:
+            # Check vocabulary diversity
+            words = text_lower.split()
+            unique_ratio = len(set(words)) / max(len(words), 1)
+            # Base coherence
+            coherence = 0.6 + 0.2 * unique_ratio
+            # Apply generic penalty
+            coherence = max(0.2, coherence - coherence_penalty)
+        # 4. Aspect Coverage Score
+        if expected_aspects and aspect_scores:
+            covered = sum(1 for a in expected_aspects
+                         if aspect_scores.get(a, 0) > 0.4)
+            coverage = covered / len(expected_aspects)
+            if coverage < 0.5:
+                issues.append(f"Only {covered}/{len(expected_aspects)} expected aspects covered")
+        else:
+            # Use variance of aspect scores as proxy
+            if aspect_scores:
+                scores = list(aspect_scores.values())
+                max_score = max(scores)
+                coverage = min(max_score + 0.2, 1.0)  # Having any strong aspect is good
+            else:
+                coverage = 0.3
+        # 5. Overall Fidelity Score
+        fidelity_score = (
+            0.30 * depth +
+            0.35 * coherence +
+            0.35 * coverage
+        )
+        return FidelityScore(
+            score=round(fidelity_score, 3),
+            coherence=round(coherence, 3),
+            coverage=round(coverage, 3),
+            depth=round(depth, 3),
+            issues=issues
+        )
+class FidelityCriteriaTransformer:
+    """
+    Enhanced aspect extraction with domain-aware prototypes and fidelity validation
+    Key improvements over TextModuleV2:
+    1. Domain-specific aspect prototypes from DomainKnowledgeBase
+    2. Fidelity scoring for response quality validation
+    3. Unified extraction interface with rich output
+    """
+    def __init__(self,
+                 model_name: str = None,
+                 domains_dir: str = None,
+                 use_gpu: bool = False):
+        """
+        Initialize FCT
+        Args:
+            model_name: Sentence transformer model (default: all-mpnet-base-v2)
+            domains_dir: Path to domain config directory
+            use_gpu: Whether to use GPU for encoding
+        """
+        self.model_name = model_name or os.getenv('FCT_MODEL_NAME', 'all-mpnet-base-v2')
+        self.device = 'cuda' if use_gpu else 'cpu'
+        # Initialize encoder
+        if HAS_SBERT:
+            logger.info(f"Loading FCT model: {self.model_name}")
+            self.encoder = SentenceTransformer(self.model_name, device=self.device)
+        else:
+            self.encoder = None
+            logger.warning("Running in fallback mode without sentence transformers")
+        # Initialize domain knowledge base
+        if HAS_DKB:
+            self.dkb = get_domain_knowledge_base(domains_dir)
+        else:
+            self.dkb = None
+        # Initialize fidelity scorer
+        self.fidelity_scorer = FidelityScorer()
+        # Cache for centroids (domain -> aspect -> centroid)
+        self._centroid_cache: Dict[str, Dict[str, np.ndarray]] = {}
+        # Default aspects (fallback when no domain detected)
+        self.default_aspects = [
+            'technical_skills', 'problem_solving', 'leadership',
+            'communication', 'teamwork', 'initiative', 'learning_agility'
+        ]
+        logger.info("FidelityCriteriaTransformer initialized")
+    def _get_centroids(self, domain_id: str,
+                       aspect_seeds: Dict[str, List[str]]) -> Dict[str, np.ndarray]:
+        """Get or compute centroids for aspects"""
+        cache_key = domain_id
+        if cache_key in self._centroid_cache:
+            return self._centroid_cache[cache_key]
+        if not self.encoder:
+            return {}
+        centroids = {}
+        for aspect, seeds in aspect_seeds.items():
+            if not seeds:
+                continue
+            # Encode seeds
+            embeddings = self.encoder.encode(seeds, convert_to_tensor=False,
+                                            show_progress_bar=False)
+            embeddings = np.array(embeddings, dtype=np.float32)
+            # Compute normalized centroid
+            centroid = np.mean(embeddings, axis=0)
+            centroid = centroid / (np.linalg.norm(centroid) + 1e-8)
+            centroids[aspect] = centroid
+        self._centroid_cache[cache_key] = centroids
+        return centroids
+    def _split_text(self, text: str, max_chunks: int = 20) -> List[str]:
+        """Split text into chunks for scoring"""
+        import re
+        # Split by sentences
+        sentences = re.split(r'[.!?]+', text)
+        chunks = [s.strip() for s in sentences if len(s.strip()) > 20]
+        # If too few, use sliding window
+        if len(chunks) < 3:
+            words = text.split()
+            window_size = 50
+            step = 25
+            chunks = []
+            for i in range(0, max(1, len(words) - window_size + 1), step):
+                chunk = ' '.join(words[i:i+window_size])
+                if len(chunk) > 20:
+                    chunks.append(chunk)
+        return chunks[:max_chunks]
+    def extract_aspects(self,
+                        text: str,
+                        domain_hint: str = None,
+                        skills: List[str] = None,
+                        expected_aspects: List[str] = None) -> AspectExtractionResult:
+        """
+        Extract aspects from text with fidelity validation
+        Args:
+            text: Text to analyze
+            domain_hint: Optional domain ID to use
+            skills: Optional list of skills for domain detection
+            expected_aspects: Optional list of expected aspects
+        Returns:
+            AspectExtractionResult with scores, evidence, and fidelity
+        """
+        # 1. Domain Detection
+        if domain_hint and self.dkb and domain_hint in self.dkb.list_domains():
+            detected_domain = domain_hint
+            domain_confidence = 1.0
+        elif self.dkb:
+            detected_domain, domain_confidence = self.dkb.detect_domain(text, skills)
+        else:
+            detected_domain = 'general'
+            domain_confidence = 0.0
+        # 2. Get aspect prototypes
+        if self.dkb and detected_domain != 'general':
+            aspect_seeds = self.dkb.get_aspect_prototypes_for_domain(detected_domain)
+        else:
+            aspect_seeds = {}
+        # 3. Fallback to default aspects if needed
+        if not aspect_seeds:
+            # Use minimal default seeds
+            aspect_seeds = {
+                'technical_skills': ['developed software', 'built systems', 'coded in python'],
+                'problem_solving': ['solved problems', 'debugged issues', 'optimized performance'],
+                'leadership': ['led team', 'managed project', 'organized event'],
+                'communication': ['presented to', 'wrote documentation', 'explained to']
+            }
+        # 4. Get/compute centroids
+        centroids = self._get_centroids(detected_domain, aspect_seeds)
+        # 5. Score text against aspects
+        aspect_scores = {}
+        chunk_evidence = {aspect: [] for aspect in centroids.keys()}
+        if not text or len(text) < 20 or not self.encoder:
+            # Return empty result
+            fidelity = self.fidelity_scorer.score(text, {}, expected_aspects)
+            return AspectExtractionResult(
+                aspects={},
+                chunk_evidence={},
+                fidelity=fidelity,
+                detected_domain=detected_domain,
+                domain_confidence=domain_confidence
+            )
+        # Split and encode text
+        chunks = self._split_text(text)
+        if not chunks:
+            fidelity = self.fidelity_scorer.score(text, {}, expected_aspects)
+            return AspectExtractionResult(
+                aspects={},
+                chunk_evidence={},
+                fidelity=fidelity,
+                detected_domain=detected_domain,
+                domain_confidence=domain_confidence
+            )
+        chunk_embeddings = self.encoder.encode(chunks, convert_to_tensor=False,
+                                              show_progress_bar=False)
+        chunk_embeddings = np.array(chunk_embeddings, dtype=np.float32)
+        # Score each aspect
+        for aspect, centroid in centroids.items():
+            # Cosine similarities
+            sims = np.dot(chunk_embeddings, centroid) / (
+                np.linalg.norm(chunk_embeddings, axis=1) * np.linalg.norm(centroid) + 1e-8
+            )
+            # Scoring: weighted max + mean of top-k
+            max_sim = float(np.max(sims))
+            top_k = 3
+            topk_sims = np.partition(sims, -min(top_k, len(sims)))[-top_k:]
+            mean_topk = float(np.mean(topk_sims))
+            # Normalize to 0-1
+            raw_score = 0.6 * max_sim + 0.4 * mean_topk
+            normalized = (raw_score + 1) / 2
+            aspect_scores[aspect] = float(np.clip(normalized, 0, 1))
+            # Collect evidence chunks
+            threshold = 0.35
+            for i, sim in enumerate(sims):
+                if sim > threshold:
+                    chunk_evidence[aspect].append(chunks[i])
+        # 6. Fidelity scoring
+        fidelity = self.fidelity_scorer.score(text, aspect_scores, expected_aspects)
+        return AspectExtractionResult(
+            aspects=aspect_scores,
+            chunk_evidence=chunk_evidence,
+            fidelity=fidelity,
+            detected_domain=detected_domain,
+            domain_confidence=domain_confidence
+        )
+    def score_student_text(self, text_responses: Dict[str, str],
+                           domain_hint: str = None,
+                           skills: List[str] = None) -> Dict[str, Any]:
+        """
+        Score all text responses for a student
+        Args:
+            text_responses: Dict with text_q1, text_q2, text_q3
+            domain_hint: Optional domain ID
+            skills: Optional skills list for domain detection
+        Returns:
+            Comprehensive scoring result
+        """
+        text_q1 = text_responses.get('text_q1', '')
+        text_q2 = text_responses.get('text_q2', '')
+        text_q3 = text_responses.get('text_q3', '')
+        # Combined text for domain detection
+        combined_text = f"{text_q1} {text_q2} {text_q3}"
+        # Expected aspects per question
+        q1_aspects = ['technical_skills', 'problem_solving', 'learning_agility']
+        q2_aspects = ['career_alignment', 'initiative', 'learning_agility']
+        q3_aspects = ['leadership', 'teamwork', 'communication']
+        # Extract aspects per question
+        q1_result = self.extract_aspects(text_q1, domain_hint, skills, q1_aspects)
+        q2_result = self.extract_aspects(text_q2, domain_hint, skills, q2_aspects)
+        q3_result = self.extract_aspects(text_q3, domain_hint, skills, q3_aspects)
+        # Aggregate scores
+        all_aspects = {}
+        for result in [q1_result, q2_result, q3_result]:
+            for aspect, score in result.aspects.items():
+                if aspect in all_aspects:
+                    all_aspects[aspect] = max(all_aspects[aspect], score)
+                else:
+                    all_aspects[aspect] = score
+        # Overall metrics
+        avg_fidelity = np.mean([
+            q1_result.fidelity.score,
+            q2_result.fidelity.score,
+            q3_result.fidelity.score
+        ])
+        # Compute weighted text score
+        weights = {
+            'technical_skills': 0.15,
+            'problem_solving': 0.10,
+            'leadership': 0.20,
+            'communication': 0.15,
+            'teamwork': 0.10,
+            'learning_agility': 0.10,
+            'initiative': 0.10,
+            'career_alignment': 0.10
+        }
+        weighted_score = sum(
+            all_aspects.get(aspect, 0.3) * weight
+            for aspect, weight in weights.items()
+        )
+        # Confidence based on fidelity
+        confidence = avg_fidelity
+        return {
+            'score': round(weighted_score, 3),
+            'confidence': round(confidence, 3),
+            'detected_domain': q1_result.detected_domain,
+            'domain_confidence': round(q1_result.domain_confidence, 3),
+            'aspects': {k: round(v, 3) for k, v in all_aspects.items()},
+            'fidelity': {
+                'overall': round(avg_fidelity, 3),
+                'q1': round(q1_result.fidelity.score, 3),
+                'q2': round(q2_result.fidelity.score, 3),
+                'q3': round(q3_result.fidelity.score, 3)
+            },
+            'issues': (
+                q1_result.fidelity.issues +
+                q2_result.fidelity.issues +
+                q3_result.fidelity.issues
+            )
+        }
+# Singleton
+_fct_instance: Optional[FidelityCriteriaTransformer] = None
+def get_fidelity_transformer(model_name: str = None,
+                            domains_dir: str = None) -> FidelityCriteriaTransformer:
+    """Get or create singleton FCT instance"""
+    global _fct_instance
+    if _fct_instance is None:
+        _fct_instance = FidelityCriteriaTransformer(model_name, domains_dir)
+    return _fct_instance

services/fusion.py ADDED Viewed

	@@ -0,0 +1,150 @@

+"""Fusion Engine - Confidence-weighted Score Fusion"""
+from typing import Dict, Tuple, Optional
+from config import Config
+class FusionEngine:
+    """Combines scores from all modules with confidence weighting"""
+    def __init__(self):
+        # Base weights (when no domain evidence)
+        self.base_weights = {
+            'universal': Config.UNIVERSAL_WEIGHT,
+            'personality': Config.PERSONALITY_WEIGHT,
+            'text': Config.TEXT_WEIGHT
+        }
+        # Extended weights (when domain evidence exists)
+        self.extended_weights = {
+            'universal': 0.30,  # Reduced from base
+            'personality': 0.25,
+            'text': 0.25,
+            'domain': 0.20  # New domain component
+        }
+    def fuse_scores(
+        self,
+        universal_score: float,
+        universal_confidence: float,
+        personality_score: float,
+        personality_confidence: float,
+        text_score: float,
+        text_confidence: float,
+        domain_score: Optional[float] = None,
+        domain_confidence: Optional[float] = None
+    ) -> Tuple[float, Dict]:
+        """
+        Fuse scores with confidence weighting
+        Supports optional domain score for pluggable domain evidence
+        Returns: (final_score, breakdown)
+        """
+        # Determine which weights to use
+        has_domain = domain_score is not None and domain_confidence is not None and domain_confidence > 0
+        weights = self.extended_weights if has_domain else self.base_weights
+        # Calculate effective weights (weight * confidence)
+        effective_weights = {
+            'universal': weights['universal'] * universal_confidence,
+            'personality': weights['personality'] * personality_confidence,
+            'text': weights['text'] * text_confidence
+        }
+        # Add domain if available
+        if has_domain:
+            effective_weights['domain'] = weights['domain'] * domain_confidence
+        # Sum of effective weights (for normalization)
+        total_effective_weight = sum(effective_weights.values())
+        # Prevent division by zero
+        if total_effective_weight == 0:
+            breakdown = {
+                'final_score': 0.0,
+                'component_scores': {
+                    'universal': 0.0,
+                    'personality': 0.0,
+                    'text': 0.0
+                },
+                'confidences': {
+                    'universal': 0.0,
+                    'personality': 0.0,
+                    'text': 0.0
+                },
+                'effective_weights': effective_weights,
+                'has_domain': False
+            }
+            if has_domain:
+                breakdown['component_scores']['domain'] = 0.0
+                breakdown['confidences']['domain'] = 0.0
+            return 0.0, breakdown
+        # Calculate fused score
+        fused_score = (
+            effective_weights['universal'] * universal_score +
+            effective_weights['personality'] * personality_score +
+            effective_weights['text'] * text_score
+        )
+        if has_domain:
+            fused_score += effective_weights['domain'] * domain_score
+        fused_score /= total_effective_weight
+        # Prepare breakdown
+        breakdown = {
+            'final_score': round(fused_score, 4),
+            'component_scores': {
+                'universal': round(universal_score, 4),
+                'personality': round(personality_score, 4),
+                'text': round(text_score, 4)
+            },
+            'confidences': {
+                'universal': round(universal_confidence, 4),
+                'personality': round(personality_confidence, 4),
+                'text': round(text_confidence, 4)
+            },
+            'effective_weights': {
+                k: round(v / total_effective_weight, 4)
+                for k, v in effective_weights.items()
+            },
+            'base_weights': weights,
+            'has_domain': has_domain
+        }
+        # Add domain info if present
+        if has_domain:
+            breakdown['component_scores']['domain'] = round(domain_score, 4)
+            breakdown['confidences']['domain'] = round(domain_confidence, 4)
+        return fused_score, breakdown
+    def get_grade(self, final_score: float) -> str:
+        """Convert score to letter grade"""
+        if final_score >= 0.9:
+            return 'A+'
+        elif final_score >= 0.85:
+            return 'A'
+        elif final_score >= 0.8:
+            return 'A-'
+        elif final_score >= 0.75:
+            return 'B+'
+        elif final_score >= 0.7:
+            return 'B'
+        elif final_score >= 0.65:
+            return 'B-'
+        elif final_score >= 0.6:
+            return 'C+'
+        elif final_score >= 0.55:
+            return 'C'
+        elif final_score >= 0.5:
+            return 'C-'
+        else:
+            return 'D'
+    def get_percentile(self, final_score: float) -> int:
+        """Estimate percentile (mock for MVP)"""
+        # In production, this would query actual distribution
+        return min(int(final_score * 100), 99)

services/personality_module.py ADDED Viewed

	@@ -0,0 +1,132 @@

+"""Personality Module - Big Five Trait Scoring"""
+import numpy as np
+from typing import Dict, Tuple
+class PersonalityModule:
+    """Scores personality based on Big Five traits"""
+    def __init__(self):
+        # Map questions to traits (reversed questions need inverse scoring)
+        self.trait_mapping = {
+            'openness': ['p_q1', 'p_q3', 'p_q4'],
+            'openness_r': ['p_q2'],  # Reversed
+            'conscientiousness': ['p_q5', 'p_q7', 'p_q8'],
+            'conscientiousness_r': ['p_q6'],  # Reversed
+            'extraversion': ['p_q9', 'p_q11', 'p_q12'],
+            'extraversion_r': ['p_q10'],  # Reversed
+            'agreeableness': ['p_q13', 'p_q15', 'p_q16'],
+            'agreeableness_r': ['p_q14'],  # Reversed
+            'stability': ['p_q17', 'p_q19', 'p_q20'],
+            'stability_r': ['p_q18']  # Reversed
+        }
+        # Weights for employability (some traits matter more)
+        self.trait_weights = {
+            'openness': 0.20,
+            'conscientiousness': 0.30,
+            'extraversion': 0.20,
+            'agreeableness': 0.15,
+            'stability': 0.15
+        }
+    def score(self, responses: Dict[str, int]) -> Tuple[float, float, Dict]:
+        """
+        Calculate personality score from 20 questions
+        Returns: (score, confidence, trait_scores)
+        """
+        trait_scores = {}
+        # Calculate each trait score
+        for trait in ['openness', 'conscientiousness', 'extraversion', 'agreeableness', 'stability']:
+            # Get normal questions
+            normal_qs = self.trait_mapping[trait]
+            reversed_qs = self.trait_mapping[f'{trait}_r']
+            # Calculate average for this trait
+            scores = []
+            # Normal questions: higher = better
+            for q in normal_qs:
+                if q in responses:
+                    scores.append((responses[q] - 1) / 4.0)  # Normalize 1-5 to 0-1
+            # Reversed questions: lower = better
+            for q in reversed_qs:
+                if q in responses:
+                    scores.append((5 - responses[q]) / 4.0)  # Invert and normalize
+            if scores:
+                trait_scores[trait] = np.mean(scores)
+            else:
+                trait_scores[trait] = 0.5  # Neutral if missing
+        # Calculate overall personality score
+        personality_score = sum(
+            trait_scores[trait] * self.trait_weights[trait]
+            for trait in trait_scores.keys()
+        )
+        # Calculate confidence based on question completion
+        total_questions = 20
+        answered_questions = len(responses)
+        confidence = answered_questions / total_questions
+        return personality_score, confidence, trait_scores
+    def explain(self, trait_scores: Dict) -> Dict:
+        """Generate explanation for personality scores"""
+        explanations = {
+            'top_strengths': [],
+            'areas_for_growth': []
+        }
+        # Sort traits by score
+        sorted_traits = sorted(trait_scores.items(), key=lambda x: x[1], reverse=True)
+        # Top 2 strengths
+        for trait, score in sorted_traits[:2]:
+            if score > 0.6:
+                explanations['top_strengths'].append({
+                    'trait': trait.capitalize(),
+                    'score': round(score, 2),
+                    'description': self._get_trait_description(trait, score)
+                })
+        # Bottom 2 areas for growth
+        for trait, score in sorted_traits[-2:]:
+            if score < 0.5:
+                explanations['areas_for_growth'].append({
+                    'trait': trait.capitalize(),
+                    'score': round(score, 2),
+                    'description': self._get_trait_description(trait, score)
+                })
+        return explanations
+    def _get_trait_description(self, trait: str, score: float) -> str:
+        """Get description of trait"""
+        descriptions = {
+            'openness': {
+                'high': "Highly creative, curious, and open to new experiences",
+                'low': "Prefers routine and traditional approaches"
+            },
+            'conscientiousness': {
+                'high': "Very organized, reliable, and goal-oriented",
+                'low': "May benefit from improved organization and planning"
+            },
+            'extraversion': {
+                'high': "Energetic, sociable, and thrives in team environments",
+                'low': "Prefers independent work and smaller groups"
+            },
+            'agreeableness': {
+                'high': "Cooperative, empathetic, and team-oriented",
+                'low': "Independent thinker, comfortable with competition"
+            },
+            'stability': {
+                'high': "Emotionally stable, handles stress well",
+                'low': "May experience stress in high-pressure situations"
+            }
+        }
+        level = 'high' if score > 0.6 else 'low'
+        return descriptions.get(trait, {}).get(level, f"{trait} score: {score:.2f}")

services/student_output.py ADDED Viewed

	@@ -0,0 +1,411 @@

+"""
+Student Output Service - Individual student JSON formatting
+Provides structured analysis output for single students
+"""
+import logging
+from typing import Dict, List, Any, Optional
+from datetime import datetime
+from dataclasses import dataclass, asdict
+logger = logging.getLogger(__name__)
+@dataclass
+class SkillRecommendation:
+    """Skill gap recommendation for student"""
+    skill: str
+    priority: str
+    recommended_courses: List[str]
+    certifications: List[str]
+@dataclass
+class CareerPath:
+    """Suggested career path"""
+    role: str
+    fit_score: float
+    requirements_met: List[str]
+    requirements_gap: List[str]
+class StudentOutputService:
+    """
+    Formats individual student analysis into structured JSON
+    """
+    # Grade thresholds
+    GRADE_THRESHOLDS = [
+        (0.90, 'A+', 'Outstanding'),
+        (0.80, 'A', 'Excellent'),
+        (0.70, 'B+', 'Very Good'),
+        (0.60, 'B', 'Good'),
+        (0.50, 'C', 'Average'),
+        (0.40, 'D', 'Below Average'),
+        (0.00, 'F', 'Needs Improvement')
+    ]
+    def __init__(self):
+        # Role requirements mapping
+        self.career_requirements = {
+            'software_engineer': {
+                'required': ['python', 'sql', 'git', 'problem_solving'],
+                'preferred': ['cloud', 'docker', 'system_design']
+            },
+            'data_scientist': {
+                'required': ['python', 'sql', 'statistics', 'machine_learning'],
+                'preferred': ['deep_learning', 'spark', 'mlops']
+            },
+            'product_manager': {
+                'required': ['communication', 'leadership', 'analytics'],
+                'preferred': ['sql', 'strategic_thinking', 'stakeholder_management']
+            },
+            'mechanical_engineer': {
+                'required': ['cad', 'engineering_drawing', 'manufacturing'],
+                'preferred': ['fea', 'cfd', 'automation']
+            }
+        }
+    def format_student_output(self,
+                              student_id: str,
+                              score_packet: Dict[str, Any],
+                              domain_analysis: Dict[str, Any] = None,
+                              raw_data: Dict[str, Any] = None) -> Dict[str, Any]:
+        """
+        Format comprehensive student analysis JSON
+        Args:
+            student_id: Student identifier
+            score_packet: Output from scoring endpoint
+            domain_analysis: Output from FCT (optional)
+            raw_data: Original student data (optional)
+        Returns:
+            Structured student JSON
+        """
+        # Extract core scores
+        final_score = score_packet.get('final_score', 0)
+        grade, grade_desc = self._get_grade(final_score)
+        # Component scores
+        component_scores = score_packet.get('scores', {}).get('component_scores', {})
+        confidences = score_packet.get('scores', {}).get('confidences', {})
+        detailed_features = score_packet.get('detailed_features', {})
+        # Domain info
+        detected_domain = (
+            domain_analysis.get('detected_domain') if domain_analysis
+            else score_packet.get('domain_type', 'general')
+        )
+        # Build output
+        output = {
+            'student_id': student_id,
+            'generated_at': datetime.utcnow().isoformat() + 'Z',
+            'summary': {
+                'final_score': round(final_score, 3),
+                'grade': grade,
+                'grade_description': grade_desc,
+                'percentile': score_packet.get('percentile', 50),
+                'placement_ready': final_score >= 0.60
+            },
+            'scores': {
+                'universal': {
+                    'score': round(component_scores.get('universal', 0), 3),
+                    'confidence': round(confidences.get('universal', 0), 3),
+                    'features': detailed_features.get('universal', {})
+                },
+                'personality': {
+                    'score': round(component_scores.get('personality', 0), 3),
+                    'confidence': round(confidences.get('personality', 0), 3),
+                    'traits': detailed_features.get('personality', {})
+                },
+                'text': {
+                    'score': round(component_scores.get('text', 0), 3),
+                    'confidence': round(confidences.get('text', 0), 3),
+                    'aspects': detailed_features.get('text', {})
+                }
+            },
+            'domain_analysis': self._format_domain_analysis(
+                detected_domain, domain_analysis, raw_data
+            ),
+            'strengths': self._identify_strengths(detailed_features),
+            'improvement_areas': self._identify_improvements(detailed_features),
+            'career_suggestions': self._suggest_careers(
+                detected_domain, detailed_features, raw_data
+            ),
+            'skill_recommendations': self._recommend_skills(
+                detected_domain, raw_data
+            ),
+            'explanations': score_packet.get('explanations', {})
+        }
+        # Add fidelity if available
+        if domain_analysis and 'fidelity' in domain_analysis:
+            output['fidelity_assessment'] = domain_analysis['fidelity']
+        return output
+    def _get_grade(self, score: float) -> tuple:
+        """Get grade and description for score"""
+        for threshold, grade, desc in self.GRADE_THRESHOLDS:
+            if score >= threshold:
+                return (grade, desc)
+        return ('F', 'Needs Improvement')
+    def _format_domain_analysis(self, detected_domain: str,
+                                domain_analysis: Dict,
+                                raw_data: Dict) -> Dict[str, Any]:
+        """Format domain-specific analysis"""
+        result = {
+            'detected_domain': detected_domain,
+            'display_name': detected_domain.replace('_', ' ').title()
+        }
+        if domain_analysis:
+            result['domain_confidence'] = domain_analysis.get('domain_confidence', 0)
+            result['aspects'] = domain_analysis.get('aspects', {})
+        # Skill gaps from raw data
+        if raw_data and 'skills' in raw_data:
+            skills = raw_data.get('skills', [])
+            if isinstance(skills, str):
+                skills = [s.strip().lower() for s in skills.split(',')]
+            result['current_skills'] = skills
+        return result
+    def _identify_strengths(self, features: Dict) -> List[Dict]:
+        """Identify top strengths from features"""
+        strengths = []
+        # Universal features
+        universal = features.get('universal', {})
+        if universal.get('cgpa_norm', 0) > 0.8:
+            strengths.append({
+                'area': 'Academic Excellence',
+                'score': universal['cgpa_norm'],
+                'description': 'Strong academic performance with high CGPA'
+            })
+        if universal.get('internship_exposure', 0) > 0.7:
+            strengths.append({
+                'area': 'Industry Experience',
+                'score': universal['internship_exposure'],
+                'description': 'Significant practical experience through internships'
+            })
+        # Personality traits
+        personality = features.get('personality', {})
+        for trait, score in personality.items():
+            if score > 0.75:
+                strengths.append({
+                    'area': trait.title(),
+                    'score': score,
+                    'description': self._get_trait_description(trait, 'high')
+                })
+        # Text aspects
+        text = features.get('text', {})
+        if text.get('leadership_score', 0) > 0.7:
+            strengths.append({
+                'area': 'Leadership',
+                'score': text['leadership_score'],
+                'description': 'Demonstrated leadership abilities with concrete examples'
+            })
+        if text.get('technical_skills', 0) > 0.7:
+            strengths.append({
+                'area': 'Technical Skills',
+                'score': text['technical_skills'],
+                'description': 'Strong technical competencies'
+            })
+        # Sort by score and return top 5
+        strengths.sort(key=lambda x: x['score'], reverse=True)
+        return strengths[:5]
+    def _identify_improvements(self, features: Dict) -> List[Dict]:
+        """Identify areas needing improvement"""
+        improvements = []
+        # Universal features
+        universal = features.get('universal', {})
+        if universal.get('ec_quality', 0) < 0.4:
+            improvements.append({
+                'area': 'Extracurricular Activities',
+                'current_score': universal.get('ec_quality', 0),
+                'suggestion': 'Join clubs, participate in competitions, or take leadership roles'
+            })
+        if universal.get('cert_quality', 0) < 0.4:
+            improvements.append({
+                'area': 'Professional Certifications',
+                'current_score': universal.get('cert_quality', 0),
+                'suggestion': 'Pursue industry-recognized certifications in your domain'
+            })
+        # Text aspects
+        text = features.get('text', {})
+        if text.get('communication', 0) < 0.5:
+            improvements.append({
+                'area': 'Communication Skills',
+                'current_score': text.get('communication', 0),
+                'suggestion': 'Practice public speaking, write detailed project documentation'
+            })
+        if text.get('career_alignment', 0) < 0.5:
+            improvements.append({
+                'area': 'Career Clarity',
+                'current_score': text.get('career_alignment', 0),
+                'suggestion': 'Define clear short-term and long-term career goals'
+            })
+        # Sort by score (lowest first)
+        improvements.sort(key=lambda x: x['current_score'])
+        return improvements[:4]
+    def _suggest_careers(self, domain: str, features: Dict,
+                        raw_data: Dict) -> List[Dict]:
+        """Suggest career paths based on profile"""
+        suggestions = []
+        # Get student skills
+        skills = []
+        if raw_data and 'skills' in raw_data:
+            skills_raw = raw_data.get('skills', [])
+            if isinstance(skills_raw, str):
+                skills = [s.strip().lower() for s in skills_raw.split(',')]
+            else:
+                skills = [s.lower() for s in skills_raw]
+        # Text features for soft skills
+        text = features.get('text', {})
+        for role, reqs in self.career_requirements.items():
+            # Calculate fit score
+            required_met = sum(1 for r in reqs['required']
+                              if r in skills or self._has_soft_skill(r, text))
+            preferred_met = sum(1 for p in reqs['preferred']
+                               if p in skills or self._has_soft_skill(p, text))
+            total_reqs = len(reqs['required'])
+            fit_score = (required_met / total_reqs) if total_reqs else 0
+            fit_score += (preferred_met / len(reqs['preferred'])) * 0.3 if reqs['preferred'] else 0
+            fit_score = min(fit_score, 1.0)
+            if fit_score > 0.3:  # Minimum threshold
+                suggestions.append({
+                    'role': role.replace('_', ' ').title(),
+                    'fit_score': round(fit_score, 2),
+                    'requirements_met': [r for r in reqs['required']
+                                        if r in skills or self._has_soft_skill(r, text)],
+                    'requirements_gap': [r for r in reqs['required']
+                                        if r not in skills and not self._has_soft_skill(r, text)]
+                })
+        # Sort by fit score
+        suggestions.sort(key=lambda x: x['fit_score'], reverse=True)
+        return suggestions[:3]
+    def _has_soft_skill(self, skill: str, text_features: Dict) -> bool:
+        """Check if student has a soft skill based on text analysis"""
+        skill_mapping = {
+            'communication': 'communication',
+            'leadership': 'leadership_score',
+            'problem_solving': 'problem_solving',
+            'teamwork': 'teamwork'
+        }
+        if skill in skill_mapping:
+            return text_features.get(skill_mapping[skill], 0) > 0.6
+        return False
+    def _recommend_skills(self, domain: str, raw_data: Dict) -> List[Dict]:
+        """Recommend skills to acquire"""
+        recommendations = []
+        # Domain-specific recommendations
+        domain_skills = {
+            'software_engineering': [
+                {'skill': 'cloud', 'courses': ['AWS Solutions Architect', 'GCP Fundamentals']},
+                {'skill': 'system_design', 'courses': ['Grokking System Design']},
+                {'skill': 'devops', 'courses': ['Docker Mastery', 'Kubernetes']}
+            ],
+            'data_science': [
+                {'skill': 'deep_learning', 'courses': ['Deep Learning Specialization']},
+                {'skill': 'mlops', 'courses': ['MLOps for Production']},
+                {'skill': 'statistics', 'courses': ['Statistics with Python']}
+            ],
+            'mechanical_engineering': [
+                {'skill': 'ev_powertrain', 'courses': ['Electric Vehicle Technology']},
+                {'skill': 'automation', 'courses': ['Industrial Automation', 'PLC']}
+            ]
+        }
+        # Get current skills
+        current_skills = []
+        if raw_data and 'skills' in raw_data:
+            skills_raw = raw_data.get('skills', [])
+            if isinstance(skills_raw, str):
+                current_skills = [s.strip().lower() for s in skills_raw.split(',')]
+        # Recommend missing skills
+        domain_recs = domain_skills.get(domain, domain_skills.get('software_engineering', []))
+        for rec in domain_recs:
+            if rec['skill'] not in current_skills:
+                recommendations.append({
+                    'skill': rec['skill'].replace('_', ' ').title(),
+                    'priority': 'high',
+                    'recommended_courses': rec['courses'],
+                    'certifications': []
+                })
+        return recommendations[:4]
+    def _get_trait_description(self, trait: str, level: str) -> str:
+        """Get description for personality trait"""
+        descriptions = {
+            'openness': {
+                'high': 'Creative, curious, and open to new experiences',
+                'low': 'Practical and focused on concrete tasks'
+            },
+            'conscientiousness': {
+                'high': 'Organized, disciplined, and reliable',
+                'low': 'Flexible and adaptable to changing situations'
+            },
+            'extraversion': {
+                'high': 'Energetic, sociable, and thrives in team settings',
+                'low': 'Focused, reflective, and excels in independent work'
+            },
+            'agreeableness': {
+                'high': 'Cooperative, empathetic, and team-oriented',
+                'low': 'Independent thinker, comfortable with competition'
+            },
+            'stability': {
+                'high': 'Emotionally resilient and handles stress well',
+                'low': 'Sensitive and responsive to feedback'
+            }
+        }
+        return descriptions.get(trait, {}).get(level, f"Strong {trait}")
+# Singleton
+_student_output_service: Optional[StudentOutputService] = None
+def get_student_output_service() -> StudentOutputService:
+    """Get singleton student output service"""
+    global _student_output_service
+    if _student_output_service is None:
+        _student_output_service = StudentOutputService()
+    return _student_output_service

services/text_module.py ADDED Viewed

	@@ -0,0 +1,211 @@

+"""Text Embeddings Module - NLP-based Scoring"""
+import numpy as np
+from sentence_transformers import SentenceTransformer
+from typing import Dict, Tuple
+import re
+class TextModule:
+    """Scores text responses using SBERT embeddings and heuristics"""
+    def __init__(self):
+        # Load SBERT model
+        self.model = SentenceTransformer('all-MiniLM-L6-v2')
+        # Reference embeddings for ideal responses
+        self.reference_embeddings = {
+            'strengths': self.model.encode([
+                "I have strong technical skills in programming, problem-solving, and software development",
+                "My strengths include leadership, communication, and analytical thinking",
+                "I excel at teamwork, project management, and innovative solutions"
+            ]),
+            'career': self.model.encode([
+                "I am interested in software engineering and technology innovation",
+                "I want to work in data science and machine learning",
+                "My goal is to become a product manager and lead technical teams"
+            ])
+        }
+        # Leadership keywords
+        self.leadership_keywords = [
+            'lead', 'leader', 'leadership', 'managed', 'organized', 'president',
+            'head', 'coordinator', 'captain', 'founded', 'initiated', 'directed'
+        ]
+    def score(self, text_responses: Dict[str, str]) -> Tuple[float, float, Dict]:
+        """
+        Calculate text score from 3 textual responses
+        Returns: (score, confidence, features)
+        """
+        features = {}
+        text_q1 = text_responses.get('text_q1', '')
+        text_q2 = text_responses.get('text_q2', '')
+        text_q3 = text_responses.get('text_q3', '')
+        # Feature 1: Writing quality (text_q1 - strengths)
+        features['writing_quality'] = self._assess_writing_quality(text_q1)
+        # Feature 2: Intent coherence (text_q2 - career interests)
+        features['intent_coherence'] = self._assess_intent_coherence(text_q2)
+        # Feature 3: Leadership flag (text_q3 - extracurriculars)
+        features['leadership_score'] = self._assess_leadership(text_q3)
+        # Feature 4: Content depth (all responses)
+        features['content_depth'] = self._assess_content_depth(text_q1, text_q2, text_q3)
+        # Calculate overall text score
+        text_score = (
+            features['writing_quality'] * 0.25 +
+            features['intent_coherence'] * 0.25 +
+            features['leadership_score'] * 0.30 +
+            features['content_depth'] * 0.20
+        )
+        # Calculate confidence based on response completeness
+        confidence = self._calculate_confidence(text_q1, text_q2, text_q3)
+        return text_score, confidence, features
+    def _assess_writing_quality(self, text: str) -> float:
+        """Assess writing quality using heuristics"""
+        if not text or len(text) < 50:
+            return 0.2
+        score = 0.5  # Base score
+        # Length check (150-300 words ideal)
+        word_count = len(text.split())
+        if 150 <= word_count <= 300:
+            score += 0.3
+        elif 100 <= word_count < 150 or 300 < word_count <= 400:
+            score += 0.2
+        else:
+            score += 0.1
+        # Sentence structure (multiple sentences)
+        sentences = re.split(r'[.!?]+', text)
+        if len(sentences) >= 5:
+            score += 0.1
+        # Proper capitalization
+        if text[0].isupper():
+            score += 0.05
+        # No excessive repetition
+        words = text.lower().split()
+        unique_ratio = len(set(words)) / len(words) if words else 0
+        if unique_ratio > 0.6:
+            score += 0.05
+        return min(score, 1.0)
+    def _assess_intent_coherence(self, text: str) -> float:
+        """Assess career intent coherence using embeddings"""
+        if not text or len(text) < 50:
+            return 0.2
+        # Encode the response
+        response_embedding = self.model.encode([text])[0]
+        # Calculate similarity with reference career embeddings
+        similarities = []
+        for ref_emb in self.reference_embeddings['career']:
+            similarity = np.dot(response_embedding, ref_emb) / (
+                np.linalg.norm(response_embedding) * np.linalg.norm(ref_emb)
+            )
+            similarities.append(similarity)
+        # Take max similarity
+        max_similarity = max(similarities) if similarities else 0
+        # Normalize to 0-1 (cosine similarity is -1 to 1)
+        score = (max_similarity + 1) / 2
+        return score
+    def _assess_leadership(self, text: str) -> float:
+        """Assess leadership based on keywords"""
+        if not text or len(text) < 50:
+            return 0.2
+        text_lower = text.lower()
+        # Count leadership keywords
+        keyword_count = sum(1 for keyword in self.leadership_keywords if keyword in text_lower)
+        # Base score on keyword presence
+        if keyword_count >= 3:
+            score = 1.0
+        elif keyword_count == 2:
+            score = 0.8
+        elif keyword_count == 1:
+            score = 0.6
+        else:
+            score = 0.3
+        # Bonus for specific leadership phrases
+        if 'led a team' in text_lower or 'team lead' in text_lower:
+            score = min(score + 0.1, 1.0)
+        return score
+    def _assess_content_depth(self, text_q1: str, text_q2: str, text_q3: str) -> float:
+        """Assess overall content depth"""
+        total_words = len(text_q1.split()) + len(text_q2.split()) + len(text_q3.split())
+        if total_words >= 450:  # 150+ words each
+            return 1.0
+        elif total_words >= 300:
+            return 0.8
+        elif total_words >= 200:
+            return 0.6
+        elif total_words >= 100:
+            return 0.4
+        else:
+            return 0.2
+    def _calculate_confidence(self, text_q1: str, text_q2: str, text_q3: str) -> float:
+        """Calculate confidence based on completeness"""
+        scores = []
+        for text in [text_q1, text_q2, text_q3]:
+            if not text:
+                scores.append(0)
+            elif len(text) < 50:
+                scores.append(0.3)
+            elif len(text) < 100:
+                scores.append(0.6)
+            else:
+                scores.append(1.0)
+        return np.mean(scores)
+    def explain(self, features: Dict) -> Dict:
+        """Generate explanation for text scores"""
+        explanations = {
+            'highlights': [],
+            'suggestions': []
+        }
+        # Highlights
+        if features.get('writing_quality', 0) > 0.7:
+            explanations['highlights'].append("Strong writing quality with clear communication")
+        if features.get('leadership_score', 0) > 0.7:
+            explanations['highlights'].append("Demonstrated leadership experience and initiative")
+        if features.get('intent_coherence', 0) > 0.7:
+            explanations['highlights'].append("Clear and coherent career goals")
+        # Suggestions
+        if features.get('writing_quality', 0) < 0.5:
+            explanations['suggestions'].append("Provide more detailed responses (aim for 150-300 words each)")
+        if features.get('leadership_score', 0) < 0.5:
+            explanations['suggestions'].append("Highlight specific leadership roles and their impact")
+        if features.get('content_depth', 0) < 0.5:
+            explanations['suggestions'].append("Include more specific examples and achievements")
+        return explanations

services/text_module_v2.py ADDED Viewed

	@@ -0,0 +1,576 @@

+"""Text Embeddings Module V2 - Aspect-based Prototype Extraction"""
+import os
+import json
+import logging
+import numpy as np
+from datetime import datetime
+from typing import Dict, Tuple, List, Optional
+from sentence_transformers import SentenceTransformer
+logger = logging.getLogger(__name__)
+# Default aspect seeds (built-in fallback)
+DEFAULT_ASPECT_SEEDS = {
+    "leadership": [
+        "led a team", "was team lead", "managed a project", "supervised interns",
+        "coordinated a cross-functional team", "organized the club", "president of the society",
+        "captain of the team", "ran weekly standups", "delegated tasks", "mentored junior members",
+        "headed the project", "oversaw project timelines", "chaired the committee",
+        "led end-to-end delivery", "directed project milestones", "led a 5-person team",
+        "managed stakeholders", "took ownership of the initiative", "led code reviews",
+        "organized campus events", "led product demo sessions", "led recruitment for volunteers",
+        "managed vendor relationships", "spearheaded the outreach program"
+    ],
+    "technical_skills": [
+        "developed a web API", "implemented RESTful services", "coded in python",
+        "built machine learning models", "trained neural networks", "implemented data pipelines",
+        "used pandas for ETL", "designed database schemas", "built microservices",
+        "deployed models using docker", "worked with FastAPI", "implemented CI/CD",
+        "wrote unit tests", "optimized SQL queries", "used scikit-learn",
+        "developed recommendation systems", "built feature engineering pipelines",
+        "deployed to cloud", "developed ETL jobs", "worked with Kafka",
+        "implemented caching layers", "used TensorFlow or PyTorch", "built backend services",
+        "wrote production-grade code", "integrated third-party APIs"
+    ],
+    "problem_solving": [
+        "solved complex problem", "debugged production issues", "optimized an algorithm",
+        "reduced latency of service", "designed a scalable solution", "investigated root cause",
+        "improved system reliability", "created a novel solution", "troubleshot integration issues",
+        "automated manual tasks", "reduced memory usage", "resolved data pipeline failures",
+        "refactored critical code", "handled edge cases", "iterated on prototypes",
+        "performed A/B testing to decide", "diagnosed performance bottlenecks",
+        "designed fallback strategies", "resolved deployment failures", "created monitoring & alerts"
+    ],
+    "internships_experience": [
+        "summer internship", "industrial training", "interned at", "worked as an intern",
+        "internship project", "internship in data science", "interned at a startup",
+        "completed internship at", "interned with the engineering team", "intern experience",
+        "interned at an e-commerce company", "industrial internship", "co-op placement",
+        "paid internship", "research internship", "interned as a software engineer",
+        "on-the-job training", "worked under mentor", "internship-driven project",
+        "corporate internship"
+    ],
+    "communication": [
+        "presented to stakeholders", "gave a presentation", "wrote documentation",
+        "authored reports", "explained results to non-technical", "public speaking",
+        "delivered demo", "prepared slides", "wrote user guides", "communicated with clients",
+        "collaborated across teams", "conducted knowledge transfer", "wrote clear emails",
+        "explained technical concepts", "presented project outcomes", "led demo sessions",
+        "created onboarding docs", "contributed to team discussions", "led workshops",
+        "hosted training sessions"
+    ],
+    "teamwork": [
+        "collaborated with team", "worked in a cross-functional team", "paired programming",
+        "contributed to group project", "supported teammates", "collaborated on design",
+        "worked with designers and PMs", "helped teammates debug", "co-authored project",
+        "mentored peers", "shared responsibilities", "worked effectively in group",
+        "contributed in agile team", "participated in sprints", "assisted in integration"
+    ],
+    "project_execution": [
+        "delivered project on time", "met project deadlines", "managed milestones",
+        "handled project planning", "released production features", "coordinated deployment",
+        "delivered MVP", "tracked KPIs", "managed scope", "created project timeline",
+        "ran retrospectives", "managed feature rollout", "ensured on-time delivery",
+        "performed release validations", "deployed analytics dashboard", "iterated based on feedback"
+    ],
+    "initiative": [
+        "initiated a project", "proposed a new idea", "took initiative", "started a side project",
+        "built a proof of concept", "started a campus chapter", "created an automation",
+        "improved an existing process", "volunteered to lead", "identified improvement areas",
+        "launched a mini-product", "ran a pilot program", "created onboarding scripts",
+        "led process improvements", "started a mentoring circle"
+    ],
+    "learning_agility": [
+        "quick learner", "self-taught", "learned new framework", "picked up new language",
+        "adapted to new tech", "completed online courses", "upskilled via projects",
+        "transitioned domains", "learned on the job", "rapidly onboarded", "attended workshops",
+        "completed bootcamp", "took certification courses", "learned through documentation",
+        "scaled knowledge quickly", "adapted to changing scope"
+    ],
+    "career_alignment": [
+        "career goal is", "aspire to become", "interested in data science",
+        "pursue a role in product", "long-term goal", "want to specialize in",
+        "career objective", "planning to pursue masters", "aim to work in industry",
+        "seek product management roles", "interested in research", "want to join a startup",
+        "targeting roles in ML engineering", "aiming for consulting roles",
+        "career path is focused on"
+    ]
+}
+# Question to aspects mapping
+QUESTION_ASPECT_MAP = {
+    "text_q1": ["technical_skills", "problem_solving", "learning_agility", "initiative", "communication"],
+    "text_q2": ["career_alignment", "learning_agility", "initiative", "communication"],
+    "text_q3": ["leadership", "teamwork", "project_execution", "internships_experience", "communication"]
+}
+class TextModuleV2:
+    """Enhanced text scoring using aspect-based prototypes with all-mpnet-base-v2"""
+    def __init__(self, model_name: str = None, seeds_path: str = "./aspect_seeds.json",
+                 centroids_path: str = "./aspect_centroids.npz"):
+        # Config: allow model override via env or param
+        self.model_name = model_name or os.getenv('ASPECT_MODEL_NAME', 'all-mpnet-base-v2')
+        self.seeds_path = seeds_path
+        self.centroids_path = centroids_path
+        # Load model
+        logger.info(f"Loading sentence transformer model: {self.model_name}")
+        self.model = SentenceTransformer(self.model_name, device='cpu')
+        # Load seeds
+        self.aspect_seeds = self._load_seeds()
+        # Load or build centroids
+        self.centroids = self._load_or_build_centroids()
+        logger.info(f"TextModuleV2 initialized with {len(self.aspect_seeds)} aspects")
+    def _load_seeds(self) -> Dict[str, List[str]]:
+        """Load aspect seeds from JSON or use defaults"""
+        if os.path.exists(self.seeds_path):
+            try:
+                with open(self.seeds_path, 'r', encoding='utf-8') as f:
+                    seeds = json.load(f)
+                logger.info(f"Loaded aspect seeds from {self.seeds_path}")
+                return seeds
+            except Exception as e:
+                logger.warning(f"Failed to load seeds from {self.seeds_path}: {e}. Using defaults.")
+        return DEFAULT_ASPECT_SEEDS.copy()
+    def _load_or_build_centroids(self) -> Dict[str, np.ndarray]:
+        """Load cached centroids or build from seeds"""
+        if os.path.exists(self.centroids_path):
+            try:
+                data = np.load(self.centroids_path)
+                centroids = {key: data[key] for key in data.files}
+                logger.info(f"Loaded centroids from {self.centroids_path}")
+                return centroids
+            except Exception as e:
+                logger.warning(f"Failed to load centroids: {e}. Rebuilding.")
+        return self.build_prototypes(self.aspect_seeds, self.model)
+    def build_prototypes(self, aspect_seeds: Dict[str, List[str]],
+                        model: SentenceTransformer) -> Dict[str, np.ndarray]:
+        """Build centroid prototypes from seed phrases"""
+        logger.info("Building aspect centroids...")
+        centroids = {}
+        for aspect, seeds in aspect_seeds.items():
+            if not seeds:
+                logger.warning(f"Aspect '{aspect}' has no seeds, skipping")
+                continue
+            # Encode seeds (CPU, convert_to_tensor=False)
+            embeddings = model.encode(seeds, convert_to_tensor=False, show_progress_bar=False)
+            embeddings = np.array(embeddings, dtype=np.float32)
+            # Compute centroid
+            centroid = np.mean(embeddings, axis=0)
+            centroid = centroid / np.linalg.norm(centroid)  # Normalize
+            centroids[aspect] = centroid
+        # Save centroids
+        try:
+            np.savez(self.centroids_path, **centroids)
+            logger.info(f"Saved centroids to {self.centroids_path}")
+        except Exception as e:
+            logger.error(f"Failed to save centroids: {e}")
+        return centroids
+    def score_text_aspects(self, text: str, centroids: Dict[str, np.ndarray],
+                          top_k: int = 3) -> Tuple[Dict[str, float], Dict[str, List[str]], float]:
+        """
+        Score text against aspect centroids
+        Returns: (aspect_scores, chunk_assignments, confidence)
+        """
+        if not text or len(text) < 20:
+            return {}, {}, 0.0
+        # Split into chunks (sentences or 50-word windows)
+        chunks = self._split_text(text)
+        if not chunks:
+            return {}, {}, 0.0
+        # Encode chunks
+        chunk_embeddings = self.model.encode(chunks, convert_to_tensor=False, show_progress_bar=False)
+        chunk_embeddings = np.array(chunk_embeddings, dtype=np.float32)
+        # Score each aspect
+        aspect_scores = {}
+        chunk_assignments = {aspect: [] for aspect in centroids.keys()}
+        for aspect, centroid in centroids.items():
+            # Compute cosine similarities
+            sims = np.dot(chunk_embeddings, centroid) / (
+                np.linalg.norm(chunk_embeddings, axis=1) * np.linalg.norm(centroid) + 1e-8
+            )
+            # Scoring formula: 0.6 * max_sim + 0.4 * mean_topk
+            max_sim = np.max(sims)
+            topk_sims = np.partition(sims, -min(top_k, len(sims)))[-top_k:]
+            mean_topk = np.mean(topk_sims)
+            # Map from [-1,1] to [0,1]
+            raw_score = 0.6 * max_sim + 0.4 * mean_topk
+            normalized_score = (raw_score + 1) / 2
+            aspect_scores[aspect] = float(np.clip(normalized_score, 0, 1))
+            # Assign chunks with sim > threshold
+            threshold = 0.3
+            for i, sim in enumerate(sims):
+                if sim > threshold:
+                    chunk_assignments[aspect].append(chunks[i])
+        # Calculate confidence
+        confidence = self._calculate_aspect_confidence(text, aspect_scores)
+        return aspect_scores, chunk_assignments, confidence
+    def _split_text(self, text: str) -> List[str]:
+        """Split text into scorable chunks"""
+        import re
+        # Split by sentences
+        sentences = re.split(r'[.!?]+', text)
+        chunks = [s.strip() for s in sentences if len(s.strip()) > 20]
+        # If too few sentences, use sliding window
+        if len(chunks) < 3:
+            words = text.split()
+            window_size = 50
+            step = 25
+            chunks = []
+            for i in range(0, max(1, len(words) - window_size + 1), step):
+                chunk = ' '.join(words[i:i+window_size])
+                if len(chunk) > 20:
+                    chunks.append(chunk)
+        return chunks[:20]  # Limit to 20 chunks
+    def _calculate_aspect_confidence(self, text: str, aspect_scores: Dict[str, float]) -> float:
+        """Calculate confidence based on text quality and score distribution"""
+        if not aspect_scores:
+            return 0.0
+        # Text length factor
+        word_count = len(text.split())
+        length_factor = min(word_count / 150, 1.0)
+        # Score variance factor (higher variance = more confident signal)
+        scores = list(aspect_scores.values())
+        score_std = np.std(scores)
+        variance_factor = min(score_std * 2, 1.0)
+        # Max score factor
+        max_score = max(scores)
+        confidence = 0.4 * length_factor + 0.3 * variance_factor + 0.3 * max_score
+        return float(np.clip(confidence, 0, 1))
+    def score(self, text_responses: Dict[str, str]) -> Tuple[float, float, Dict]:
+        """
+        Main scoring function - backward compatible interface
+        Returns: (score, confidence, features)
+        """
+        text_q1 = text_responses.get('text_q1', '')
+        text_q2 = text_responses.get('text_q2', '')
+        text_q3 = text_responses.get('text_q3', '')
+        # Score each question with relevant aspects
+        q1_aspects = QUESTION_ASPECT_MAP['text_q1']
+        q2_aspects = QUESTION_ASPECT_MAP['text_q2']
+        q3_aspects = QUESTION_ASPECT_MAP['text_q3']
+        q1_centroids = {k: self.centroids[k] for k in q1_aspects if k in self.centroids}
+        q2_centroids = {k: self.centroids[k] for k in q2_aspects if k in self.centroids}
+        q3_centroids = {k: self.centroids[k] for k in q3_aspects if k in self.centroids}
+        q1_scores, _, q1_conf = self.score_text_aspects(text_q1, q1_centroids)
+        q2_scores, _, q2_conf = self.score_text_aspects(text_q2, q2_centroids)
+        q3_scores, _, q3_conf = self.score_text_aspects(text_q3, q3_centroids)
+        # Aggregate features
+        features = {}
+        # Technical skills from Q1
+        features['technical_skills'] = q1_scores.get('technical_skills', 0.3)
+        features['problem_solving'] = q1_scores.get('problem_solving', 0.3)
+        # Career alignment from Q2
+        features['career_alignment'] = q2_scores.get('career_alignment', 0.3)
+        features['learning_agility'] = max(
+            q1_scores.get('learning_agility', 0.3),
+            q2_scores.get('learning_agility', 0.3)
+        )
+        # Leadership from Q3
+        features['leadership_score'] = q3_scores.get('leadership', 0.3)
+        features['teamwork'] = q3_scores.get('teamwork', 0.3)
+        features['internships_experience'] = q3_scores.get('internships_experience', 0.3)
+        # Communication (averaged across all)
+        comm_scores = [
+            q1_scores.get('communication', 0.3),
+            q2_scores.get('communication', 0.3),
+            q3_scores.get('communication', 0.3)
+        ]
+        features['communication'] = np.mean(comm_scores)
+        # Writing quality (heuristic)
+        features['writing_quality'] = self._assess_writing_quality(text_q1)
+        # Content depth
+        features['content_depth'] = self._assess_content_depth(text_q1, text_q2, text_q3)
+        # Calculate overall score (weighted combination)
+        text_score = (
+            features['technical_skills'] * 0.15 +
+            features['problem_solving'] * 0.10 +
+            features['leadership_score'] * 0.20 +
+            features['career_alignment'] * 0.10 +
+            features['communication'] * 0.15 +
+            features['teamwork'] * 0.10 +
+            features['learning_agility'] * 0.10 +
+            features['content_depth'] * 0.10
+        )
+        # Overall confidence
+        confidence = np.mean([q1_conf, q2_conf, q3_conf])
+        return text_score, confidence, features
+    def _assess_writing_quality(self, text: str) -> float:
+        """Heuristic writing quality assessment"""
+        if not text or len(text) < 50:
+            return 0.2
+        score = 0.5
+        word_count = len(text.split())
+        if 150 <= word_count <= 300:
+            score += 0.3
+        elif 100 <= word_count < 150 or 300 < word_count <= 400:
+            score += 0.2
+        else:
+            score += 0.1
+        import re
+        sentences = re.split(r'[.!?]+', text)
+        if len(sentences) >= 5:
+            score += 0.1
+        if text[0].isupper():
+            score += 0.05
+        words = text.lower().split()
+        unique_ratio = len(set(words)) / len(words) if words else 0
+        if unique_ratio > 0.6:
+            score += 0.05
+        return min(score, 1.0)
+    def _assess_content_depth(self, text_q1: str, text_q2: str, text_q3: str) -> float:
+        """Assess content depth"""
+        total_words = len(text_q1.split()) + len(text_q2.split()) + len(text_q3.split())
+        if total_words >= 450:
+            return 1.0
+        elif total_words >= 300:
+            return 0.8
+        elif total_words >= 200:
+            return 0.6
+        elif total_words >= 100:
+            return 0.4
+        else:
+            return 0.2
+    def explain(self, features: Dict) -> Dict:
+        """Generate explanations"""
+        explanations = {
+            'highlights': [],
+            'suggestions': []
+        }
+        if features.get('technical_skills', 0) > 0.7:
+            explanations['highlights'].append("Strong technical skills demonstrated")
+        if features.get('leadership_score', 0) > 0.7:
+            explanations['highlights'].append("Clear leadership experience")
+        if features.get('career_alignment', 0) > 0.7:
+            explanations['highlights'].append("Well-defined career goals")
+        if features.get('communication', 0) > 0.7:
+            explanations['highlights'].append("Excellent communication skills")
+        if features.get('writing_quality', 0) < 0.5:
+            explanations['suggestions'].append("Provide more detailed responses (150-300 words each)")
+        if features.get('leadership_score', 0) < 0.5:
+            explanations['suggestions'].append("Highlight leadership roles with specific examples")
+        if features.get('technical_skills', 0) < 0.5:
+            explanations['suggestions'].append("Describe technical projects and skills in detail")
+        return explanations
+    # Admin functions
+    def get_aspect_seeds(self) -> Dict[str, List[str]]:
+        """Return current loaded seeds"""
+        return self.aspect_seeds.copy()
+    def update_aspect_seeds(self, new_seeds: Dict[str, List[str]],
+                           persist: bool = True) -> Dict:
+        """
+        Update aspect seeds and recompute centroids
+        Returns: stats dict
+        """
+        # Validate
+        if not isinstance(new_seeds, dict):
+            raise ValueError("new_seeds must be a dict")
+        for key, seeds in new_seeds.items():
+            if not isinstance(key, str):
+                raise ValueError(f"Aspect key must be string, got {type(key)}")
+            if not isinstance(seeds, list) or not seeds:
+                raise ValueError(f"Seeds for '{key}' must be non-empty list")
+            if not all(isinstance(s, str) for s in seeds):
+                raise ValueError(f"All seeds for '{key}' must be strings")
+        # Update seeds
+        self.aspect_seeds = new_seeds.copy()
+        # Recompute centroids
+        logger.info("Recomputing centroids after seed update")
+        self.centroids = self.build_prototypes(self.aspect_seeds, self.model)
+        # Persist
+        if persist:
+            try:
+                with open(self.seeds_path, 'w', encoding='utf-8') as f:
+                    json.dump(new_seeds, f, indent=2, ensure_ascii=False)
+                logger.info(f"Persisted new seeds to {self.seeds_path}")
+            except Exception as e:
+                logger.error(f"Failed to persist seeds: {e}")
+        # Stats
+        stats = {
+            "num_aspects": len(new_seeds),
+            "avg_seed_count": np.mean([len(seeds) for seeds in new_seeds.values()]),
+            "timestamp": datetime.utcnow().isoformat() + 'Z'
+        }
+        logger.info(f"Aspect seeds updated: {stats}")
+        return stats
+    def suggest_seed_expansions(self, corpus_texts: List[str], aspect_key: str,
+                               top_n: int = 20) -> List[str]:
+        """
+        Suggest seed expansions from corpus
+        Uses TF-IDF + cosine similarity for lightweight extraction
+        """
+        if aspect_key not in self.centroids:
+            return []
+        centroid = self.centroids[aspect_key]
+        # Extract candidate phrases from corpus
+        from collections import Counter
+        import re
+        candidates = []
+        for text in corpus_texts[:100]:  # Limit corpus
+            # Extract 2-5 word n-grams
+            words = text.lower().split()
+            for n in range(2, 6):
+                for i in range(len(words) - n + 1):
+                    phrase = ' '.join(words[i:i+n])
+                    if len(phrase) > 10 and not re.search(r'\d{3,}', phrase):
+                        candidates.append(phrase)
+        # Count frequency
+        phrase_counts = Counter(candidates)
+        top_candidates = [phrase for phrase, _ in phrase_counts.most_common(200)]
+        if not top_candidates:
+            return []
+        # Encode and rank by similarity
+        candidate_embeddings = self.model.encode(top_candidates, convert_to_tensor=False,
+                                                show_progress_bar=False)
+        candidate_embeddings = np.array(candidate_embeddings, dtype=np.float32)
+        sims = np.dot(candidate_embeddings, centroid) / (
+            np.linalg.norm(candidate_embeddings, axis=1) * np.linalg.norm(centroid) + 1e-8
+        )
+        # Return top_n
+        top_indices = np.argsort(sims)[-top_n:][::-1]
+        suggestions = [top_candidates[i] for i in top_indices]
+        return suggestions
+def get_relevant_aspects_for_question(question_id: str) -> List[str]:
+    """Get relevant aspect keys for a question"""
+    return QUESTION_ASPECT_MAP.get(question_id, [])
+# Flask admin blueprint
+def register_admin_seed_endpoint(app, text_module: TextModuleV2):
+    """Register admin endpoints for seed management"""
+    from flask import Blueprint, request, jsonify
+    admin_bp = Blueprint('admin_aspects', __name__, url_prefix='/admin')
+    def check_admin_token():
+        token = request.headers.get('X-Admin-Token')
+        expected = os.getenv('ADMIN_SEED_TOKEN', 'admin-secret-token')
+        if token != expected:
+            return jsonify({'error': 'Unauthorized'}), 401
+        return None
+    @admin_bp.route('/aspect-seeds', methods=['GET'])
+    def get_seeds():
+        """Get current aspect seeds"""
+        auth_err = check_admin_token()
+        if auth_err:
+            return auth_err
+        seeds = text_module.get_aspect_seeds()
+        return jsonify({
+            'success': True,
+            'seeds': seeds,
+            'num_aspects': len(seeds)
+        })
+    @admin_bp.route('/aspect-seeds', methods=['POST'])
+    def update_seeds():
+        """Update aspect seeds"""
+        auth_err = check_admin_token()
+        if auth_err:
+            return auth_err
+        data = request.json
+        new_seeds = data.get('seeds')
+        persist = data.get('persist', True)
+        if not new_seeds:
+            return jsonify({'error': 'Missing seeds field'}), 400
+        try:
+            stats = text_module.update_aspect_seeds(new_seeds, persist=persist)
+            return jsonify({
+                'success': True,
+                'message': 'Aspect seeds updated successfully',
+                'stats': stats
+            })
+        except Exception as e:
+            logger.error(f"Failed to update seeds: {e}")
+            return jsonify({'error': str(e)}), 400
+    app.register_blueprint(admin_bp)
+    logger.info("Registered admin aspect-seed endpoints at /admin/aspect-seeds")

services/universal_module.py ADDED Viewed

	@@ -0,0 +1,240 @@

+"""Universal Module - Academic & Experience Scoring"""
+import numpy as np
+import re
+from typing import Dict, Tuple
+class UniversalModule:
+    """Scores based on academic performance and experience"""
+    def __init__(self):
+        self.feature_weights = {
+            'cgpa_norm': 0.30,
+            'sgpa_trend': 0.15,
+            'sgpa_consistency': 0.10,
+            'marks_consistency': 0.10,
+            'academic_improvement': 0.10,
+            'internship_exposure': 0.10,
+            'ec_quality': 0.08,
+            'cert_quality': 0.07
+        }
+    def score(self, student_data: Dict) -> Tuple[float, float, Dict]:
+        """
+        Calculate universal score
+        Returns: (score, confidence, features_dict)
+        """
+        features = {}
+        # CGPA normalization (0-10 scale)
+        cgpa = student_data.get('cgpa', 0)
+        features['cgpa_norm'] = min(cgpa / 10.0, 1.0)
+        # SGPA trend (improvement across semesters) - filter out null values
+        sgpa_values = []
+        for sem_num in range(1, 9):
+            sem_val = student_data.get(f'sgpa_sem{sem_num}')
+            if sem_val is not None and sem_val > 0:  # Ignore null/zero values
+                sgpa_values.append(sem_val)
+        if len(sgpa_values) >= 2:
+            # Calculate trend from first to last available semester
+            trend = (sgpa_values[-1] - sgpa_values[0]) / 10.0  # Normalize
+            features['sgpa_trend'] = max(0, min(trend + 0.5, 1.0))  # Center at 0.5
+        else:
+            features['sgpa_trend'] = 0.5  # Neutral if insufficient data
+        # SGPA consistency (lower std = more consistent = better)
+        if len(sgpa_values) >= 3:
+            std_dev = np.std(sgpa_values)
+            features['sgpa_consistency'] = max(0, 1 - (std_dev / 3.0))  # Inverse relationship
+        else:
+            features['sgpa_consistency'] = 0.5
+        # Marks consistency across 10th, 12th, CGPA
+        tenth = student_data.get('tenth_pct')
+        twelfth = student_data.get('twelfth_pct')
+        if tenth and twelfth and cgpa:
+            cgpa_pct = (cgpa / 10.0) * 100
+            marks_std = np.std([tenth, twelfth, cgpa_pct])
+            features['marks_consistency'] = max(0, 1 - (marks_std / 30.0))
+        else:
+            features['marks_consistency'] = 0.5
+        # Academic improvement flag
+        if tenth and twelfth and cgpa:
+            cgpa_pct = (cgpa / 10.0) * 100
+            if cgpa_pct > twelfth and twelfth > tenth:
+                features['academic_improvement'] = 1.0
+            elif cgpa_pct > twelfth or twelfth > tenth:
+                features['academic_improvement'] = 0.7
+            else:
+                features['academic_improvement'] = 0.3
+        else:
+            features['academic_improvement'] = 0.5
+        # Extract features from text responses (handle None values)
+        internship_text = student_data.get('internship_text') or ''
+        ec_text = student_data.get('extracurricular_text') or ''
+        cert_text = student_data.get('certifications_text') or ''
+        # Internship exposure - extract from text
+        features['internship_exposure'] = self._assess_internship_quality(internship_text)
+        # Extracurricular quality - extract from text
+        features['ec_quality'] = self._assess_extracurricular_quality(ec_text)
+        # Certification quality - extract from text
+        features['cert_quality'] = self._assess_certification_quality(cert_text)
+        # Calculate weighted score
+        score = sum(features[k] * self.feature_weights[k] for k in features.keys())
+        # Calculate confidence based on data completeness
+        total_fields = 8
+        filled_fields = sum([
+            1 if cgpa > 0 else 0,
+            1 if len(sgpa_values) >= 2 else 0,
+            1 if len(sgpa_values) >= 3 else 0,
+            1 if tenth and twelfth else 0,
+            1 if tenth and twelfth and cgpa else 0,
+            1 if len(internship_text) > 20 else 0,
+            1 if len(ec_text) > 20 else 0,
+            1 if len(cert_text) > 20 else 0
+        ])
+        confidence = filled_fields / total_fields
+        return score, confidence, features
+    def explain(self, features: Dict) -> Dict:
+        """Generate explanation for scores"""
+        explanations = {
+            'top_positive_features': [],
+            'top_negative_features': []
+        }
+        # Sort features by value
+        sorted_features = sorted(features.items(), key=lambda x: x[1], reverse=True)
+        # Top 3 positive
+        for feat, val in sorted_features[:3]:
+            if val > 0.6:
+                explanations['top_positive_features'].append({
+                    'feature': feat,
+                    'value': round(val, 2),
+                    'description': self._get_feature_description(feat, val)
+                })
+        # Top 3 negative
+        for feat, val in sorted_features[-3:]:
+            if val < 0.4:
+                explanations['top_negative_features'].append({
+                    'feature': feat,
+                    'value': round(val, 2),
+                    'description': self._get_feature_description(feat, val)
+                })
+        return explanations
+    def _assess_internship_quality(self, text: str) -> float:
+        """Extract internship quality from text"""
+        if not text or len(text) < 20:
+            return 0.0
+        score = 0.0
+        text_lower = text.lower()
+        # Duration indicators
+        duration_patterns = [
+            (r'\b(\d+)\s*months?\b', 1.0),
+            (r'\b(\d+)\s*weeks?\b', 0.25),
+            (r'summer\s+internship', 0.5),
+            (r'year\s+long|full\s+year|annual', 1.0),
+        ]
+        max_duration_score = 0.0
+        for pattern, multiplier in duration_patterns:
+            matches = re.findall(pattern, text_lower)
+            if matches:
+                if pattern.startswith(r'\b(\d+)'):
+                    duration = max([int(m) for m in matches]) * multiplier
+                    max_duration_score = max(max_duration_score, min(duration / 6.0, 1.0))
+                else:
+                    max_duration_score = max(max_duration_score, multiplier)
+        score += max_duration_score * 0.4
+        # Quality indicators
+        quality_keywords = ['company', 'startup', 'corporation', 'project', 'developed',
+                          'implemented', 'built', 'deployed', 'managed', 'led']
+        quality_count = sum(1 for kw in quality_keywords if kw in text_lower)
+        score += min(quality_count / len(quality_keywords), 1.0) * 0.4
+        # Length indicates detail
+        score += min(len(text) / 500, 1.0) * 0.2
+        return min(score, 1.0)
+    def _assess_extracurricular_quality(self, text: str) -> float:
+        """Extract extracurricular quality from text"""
+        if not text or len(text) < 20:
+            return 0.0
+        score = 0.0
+        text_lower = text.lower()
+        # Leadership indicators
+        leadership_keywords = ['led', 'organized', 'president', 'captain', 'head',
+                             'coordinator', 'managed', 'founded']
+        leadership_count = sum(1 for kw in leadership_keywords if kw in text_lower)
+        score += min(leadership_count / 3, 1.0) * 0.4
+        # Activity types
+        activity_keywords = ['club', 'society', 'competition', 'hackathon', 'event',
+                           'volunteer', 'sports', 'cultural', 'technical']
+        activity_count = sum(1 for kw in activity_keywords if kw in text_lower)
+        score += min(activity_count / 4, 1.0) * 0.4
+        # Detail level
+        score += min(len(text) / 400, 1.0) * 0.2
+        return min(score, 1.0)
+    def _assess_certification_quality(self, text: str) -> float:
+        """Extract certification quality from text"""
+        if not text or len(text) < 20:
+            return 0.0
+        score = 0.0
+        text_lower = text.lower()
+        # Platform indicators (reputable sources)
+        platform_keywords = ['coursera', 'udemy', 'edx', 'linkedin', 'google',
+                           'microsoft', 'aws', 'azure', 'ibm', 'oracle']
+        platform_count = sum(1 for kw in platform_keywords if kw in text_lower)
+        score += min(platform_count / 3, 1.0) * 0.4
+        # Technical skills
+        tech_keywords = ['python', 'java', 'machine learning', 'data science', 'cloud',
+                        'programming', 'development', 'database', 'web', 'mobile']
+        tech_count = sum(1 for kw in tech_keywords if kw in text_lower)
+        score += min(tech_count / 4, 1.0) * 0.4
+        # Detail level
+        score += min(len(text) / 400, 1.0) * 0.2
+        return min(score, 1.0)
+    def _get_feature_description(self, feature: str, value: float) -> str:
+        """Get human-readable description of feature"""
+        descriptions = {
+            'cgpa_norm': f"CGPA performance: {value*10:.1f}/10",
+            'sgpa_trend': "Strong upward trend in semester grades" if value > 0.6 else "Declining semester grades",
+            'sgpa_consistency': "Very consistent semester performance" if value > 0.7 else "Inconsistent semester performance",
+            'marks_consistency': "Consistent performance across academics" if value > 0.7 else "Variable academic performance",
+            'academic_improvement': "Clear improvement over time" if value > 0.7 else "Limited academic growth",
+            'internship_exposure': "Strong internship experience" if value > 0.6 else "Limited internship exposure",
+            'ec_quality': "Excellent extracurricular involvement" if value > 0.6 else "Limited extracurricular activities",
+            'cert_quality': "Strong certification portfolio" if value > 0.6 else "Few professional certifications"
+        }
+        return descriptions.get(feature, feature)