MakPr016
v2: Upgraded to retrained model with improved PO6-PO11 accuracy, updated NBA definitions
d67b274
| from fastapi import FastAPI, HTTPException | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from pydantic import BaseModel | |
| from typing import List, Optional, Dict, Any | |
| from sentence_transformers import SentenceTransformer | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| import os | |
| import re | |
| OFFICIAL_PO_DEFINITIONS = { | |
| "PO1": "Apply the knowledge of mathematics, science, engineering fundamentals, and an engineering specialization to the solution of complex engineering problems", | |
| "PO2": "Identify, formulate, review research literature, and analyze complex engineering problems reaching substantiated conclusions using first principles of mathematics, natural sciences, and engineering sciences", | |
| "PO3": "Design solutions for complex engineering problems and design system components or processes that meet the specified needs with appropriate consideration for the public health and safety, and the cultural, societal, and environmental considerations", | |
| "PO4": "Use research-based knowledge and research methods including design of experiments, analysis and interpretation of data, and synthesis of the information to provide valid conclusions", | |
| "PO5": "Create, select, and apply appropriate techniques, resources, and modern engineering and IT tools including prediction and modeling to complex engineering activities with an understanding of the limitations", | |
| "PO6": "Apply reasoning informed by the contextual knowledge to assess societal, health, safety, legal and cultural issues and the consequent responsibilities relevant to the professional engineering practice", | |
| "PO7": "Understand the impact of the professional engineering solutions in societal and environmental contexts, and demonstrate the knowledge of, and need for sustainable development", | |
| "PO8": "Apply ethical principles and commit to professional ethics and responsibilities and norms of the engineering practice", | |
| "PO9": "Function effectively as an individual, and as a member or leader in diverse teams, and in multidisciplinary settings", | |
| "PO10": "Communicate effectively on complex engineering activities with the engineering community and with society at large, such as, being able to comprehend and write effective reports and design documentation, make effective presentations, and give and receive clear instructions", | |
| "PO11": "Demonstrate knowledge and understanding of the engineering and management principles and apply these to one's own work, as a member and leader in a team, to manage projects and in multidisciplinary environments" | |
| } | |
| PO_KEYWORDS = { | |
| "PO1": [ | |
| "knowledge", "mathematics", "math", "science", "computing", "engineering", | |
| "fundamental", "theory", "concept", "principle", "algorithm", "data structure", | |
| "programming", "software", "hardware", "circuit", "system", "analysis", | |
| "understand", "explain", "apply", "technical", "computer", | |
| "physics", "chemistry", "material", "thermodynamics", "mechanics", "electricity", | |
| "waves", "optics", "equilibrium", "conservation", "kinetics", "diffusion", | |
| "reaction rates", "calculus", "linear algebra", "differential equations", | |
| "probability", "statistics", "regression", "interpolation", "transforms", | |
| "numerical", "integration", "error analysis", "kinematics", "dynamics", | |
| "fluid flow", "heat transfer", "stress", "strain", "control", "signals", | |
| "embedded", "database", "OS", "VLSI", "power systems", "machine design", | |
| "networking", "modelling", "compute", "derive", "formulate", "validate", | |
| "solve", "approximation", "assumptions", "constraints", "simulation", | |
| "optimization", "verification", "problem-solving", "quantitative", | |
| "dimensional", "specialization", "theory-based", "unit analysis", | |
| "abstraction", "material properties", "conservation laws", "estimation", | |
| "numerical methods", "numerical integration", "circuits", "control basics", | |
| "embedded basics", "OS concepts", "data structures", "advanced tools", "codes" | |
| ], | |
| "PO2": [ | |
| "identify", "formulate", "analyze", "analysis", "problem", "research", | |
| "investigate", "investigation", "examine", "evaluate", "evaluation", "assess", | |
| "assessment", "literature", "study", "review", "complex", "assumptions", | |
| "constraints", "requirements", "decompose", "root cause", "hypothesis", | |
| "criteria", "metrics", "trade-offs", "case analysis", "feasibility", "risk", | |
| "sustainability", "data gathering", "pattern recognition", "model selection", | |
| "verification", "validation", "evidence", "argumentation", "conclusion", | |
| "justification", "sensitivity", "sensitivity analysis", "physical principles", | |
| "experimental", "measurement", "uncertainty", "empirical", "statistical", | |
| "hypothesis testing", "confidence intervals", "correlation", "multivariate", | |
| "error propagation", "failure modes", "load cases", "boundary conditions", | |
| "specifications", "performance limits", "safety factors", "standards", | |
| "cybersecurity", "grid stability", "bioprocess", "substantiated", | |
| "phenomena", "empirical evidence", "statistical analysis", "standards baseline", | |
| "domain constraints", "complex scenarios", "advanced methods", "first principles", | |
| "literature review", "complex problems" | |
| ], | |
| "PO3": [ | |
| "design", "create", "develop", "build", "implement", "implementation", | |
| "construct", "architecture", "model", "prototype", "system", "component", | |
| "solution", "innovative", "creative", "synthesize", "specification", | |
| "requirements", "constraints", "whole-life cost", "net zero", "sustainability", | |
| "public health", "safety", "stakeholder", "trade-off", "optimization", | |
| "feasibility", "innovation", "integration", "standards", "compliance", | |
| "documentation", "design process", "cost analysis", "environmental impact", | |
| "resource efficiency", "life-cycle assessment", "manufacturability", | |
| "maintainability", "codes", "project management", "plan", "propose", | |
| "integrate", "optimize", "specify", "creative solutions", "system components", | |
| "processes", "meet needs", "appropriate consideration", "cultural", | |
| "societal", "environmental considerations" | |
| ], | |
| "PO4": [ | |
| "experiment", "test", "testing", "measure", "measurement", "data", "analysis", | |
| "interpret", "interpretation", "validation", "verify", "verification", | |
| "research", "investigation", "empirical", "benchmark", "evaluate", | |
| "experimental design", "hypothesis testing", "methodology", "data collection", | |
| "analyze data", "validate", "model calibration", "research-based", | |
| "literature review", "protocol", "observation", "inference", "reproducibility", | |
| "documentation", "research methodology", "peer review", "experimental setup", | |
| "critical review", "emerging issues", "investigate", "calibrate", | |
| "design of experiments", "synthesis", "synthesis of information", | |
| "valid conclusions", "research methods", "research-based knowledge", | |
| "interpretation of data" | |
| ], | |
| "PO5": [ | |
| "tool", "tools", "technology", "software", "framework", "platform", "library", | |
| "IDE", "programming", "language", "modern", "technique", "method", "approach", | |
| "implement", "application", "use", "utilize", "simulate", "modeling", | |
| "calibrate", "CAD", "CAE", "algorithm", "limitations", "benchmarking", | |
| "tool selection", "automation", "digital twin", "data visualization", | |
| "computation", "process", "manufacturing", "deployment", "instrumentation", | |
| "digital tools", "operate", "program", "automate", "simulation", | |
| "modern engineering tools", "IT tools", "prediction", "modeling", | |
| "understanding limitations", "CAD/CAE tools", "modern techniques", | |
| "appropriate techniques", "resources", "complex engineering activities" | |
| ], | |
| "PO6": [ | |
| "society", "social", "environmental", "environment", "sustainability", | |
| "sustainable", "impact", "ethical", "responsible", "responsibility", | |
| "green", "energy", "carbon", "climate", "eco", "community", "culture", | |
| "global", "societal impact", "legal", "health", "safety", "cultural", | |
| "economic impacts", "standards", "compliance", "lifecycle assessment", | |
| "regulation", "public welfare", "risk assessment", "ethics", "policy", | |
| "public health", "safety standards", "cost-benefit", "resource allocation", | |
| "EIA", "legal framework", "assess", "justify", "comply", "recommend", | |
| "judge", "critique", "environmental science", "reasoning", "contextual knowledge", | |
| "societal issues", "health issues", "safety issues", "legal issues", | |
| "cultural issues", "consequent responsibilities", "professional engineering practice", | |
| "engineer and society" | |
| ], | |
| "PO7": [ | |
| "sustainability", "environmental impact", "resource efficiency", "renewable", | |
| "pollution", "waste management", "climate change", "conservation", | |
| "life-cycle assessment", "green design", "eco-friendly", "regulatory compliance", | |
| "sustainable development", "carbon footprint", "circular economy", | |
| "biodiversity", "ecosystem", "environmental degradation", "clean energy", | |
| "water conservation", "soil conservation", "regulation", "public welfare", | |
| "green technology", "climate policy", "renewable resources", | |
| "ecological balance", "professional engineering solutions", | |
| "societal contexts", "environmental contexts", "knowledge of sustainable development", | |
| "need for sustainable development", "understand impact" | |
| ], | |
| "PO8": [ | |
| "ethics", "ethical", "professional", "integrity", "responsibility", | |
| "responsible", "conduct", "moral", "morality", "values", "principles", | |
| "principle", "honesty", "fairness", "accountability", "code of conduct", | |
| "inclusivity", "diversity", "compliance", "governance", "transparency", | |
| "confidentiality", "conflict of interest", "professional ethics", | |
| "moral values", "ethical behavior", "ethical dilemma", "social responsibility", | |
| "professional standards", "code of conduct", "bias mitigation", | |
| "inclusive behavior", "professional responsibility", "ethical decision-making", | |
| "professional norms", "ethical framework", "adhere", "demonstrate", | |
| "respect", "reflect", "act", "commit", "advocate", "ethical principles", | |
| "commit to ethics", "norms of engineering practice", "professional ethics and responsibilities" | |
| ], | |
| "PO9": [ | |
| "team", "teams", "collaborate", "collaboration", "cooperative", "cooperation", | |
| "group", "leadership", "leader", "member", "members", "teamwork", "collective", | |
| "peer", "diverse", "diversity", "multicultural", "together", "coordinate", | |
| "roles", "team dynamics", "multidisciplinary", "conflict resolution", | |
| "accountability", "contribution", "communication", "delegation", "motivation", | |
| "feedback", "participation", "project management", "multidisciplinary integration", | |
| "cooperate", "lead", "participate", "contribute", "support", "facilitate", | |
| "manage", "team building", "interpersonal skills", "group work", | |
| "team coordination", "collaborative problem-solving", "function effectively", | |
| "individual", "member or leader", "diverse teams", "multidisciplinary settings" | |
| ], | |
| "PO10": [ | |
| "communicate", "communication", "present", "presentation", "document", | |
| "documentation", "report", "write", "writing", "speak", "speaking", | |
| "explain", "articulate", "technical writing", "stakeholder", "audience", | |
| "clarity", "visualization", "inclusivity", "language differences", | |
| "comprehension", "oral communication", "written communication", "feedback", | |
| "listening", "negotiation", "reporting", "audience analysis", "illustrate", | |
| "summarize", "interpret", "discuss", "listen", "effective communication", | |
| "clear instructions", "design documentation", "presentation skills", | |
| "effective reports", "communicate effectively", "engineering community", | |
| "society at large", "comprehend", "write effective reports", | |
| "make effective presentations", "give and receive instructions", | |
| "complex engineering activities" | |
| ], | |
| "PO11": [ | |
| "project", "projects", "management", "manage", "plan", "planning", "schedule", | |
| "scheduling", "resource", "resources", "budget", "cost", "timeline", "milestone", | |
| "risk", "decision", "economic", "strategy", "organize", "organization", | |
| "cost analysis", "resource allocation", "feasibility", "management principles", | |
| "economics", "decision-making", "leadership", "project execution", "monitoring", | |
| "evaluation", "procurement", "stakeholder management", "cost estimation", | |
| "budgeting", "risk management", "allocate", "execute", "coordinate", | |
| "financial management", "project planning", "learning", "learn", "adapt", | |
| "adapting", "adaptability", "emerging", "new", "continuous", "lifelong", | |
| "skill", "skills", "development", "growth", "technology", "technologies", | |
| "trend", "trends", "innovation", "self-learn", "update", "evolve", "change", | |
| "technological change", "keep abreast", "critical thinking", "reflect", | |
| "independent learning", "update skills", "professional development", "curiosity", | |
| "continuous improvement", "resilience", "self-learning", "independent study", | |
| "critical review", "emerging issues", "reflection", "continuous learning", | |
| "skill enhancement", "knowledge updating", "pursue", "explore", "inquire", | |
| "improve", "recognize need", "preparation", "ability to engage", | |
| "independent learning", "life-long learning", "broadest context", | |
| "engineering and management principles", "apply to own work", | |
| "member and leader in team", "manage projects", "multidisciplinary environments", | |
| "emerging technologies" | |
| ] | |
| } | |
| class FineTunedCOPOMapper: | |
| def __init__(self): | |
| hf_token = os.environ.get("HF_TOKEN") | |
| print("Loading private model from Hugging Face...") | |
| self.model = SentenceTransformer("MakPr016/co-po-finetuned-model-v2", use_auth_token=hf_token) | |
| print("Model loaded successfully!") | |
| self.po_embeddings = {} | |
| self._precompute_po_embeddings() | |
| def _precompute_po_embeddings(self): | |
| for po_id, po_text in OFFICIAL_PO_DEFINITIONS.items(): | |
| self.po_embeddings[po_id] = self.model.encode([po_text])[0] | |
| def _normalize_text(self, text): | |
| text = text.lower() | |
| text = re.sub(r'[^\w\s]', ' ', text) | |
| return re.sub(r'\s+', ' ', text).strip() | |
| def _calculate_keyword_score(self, co_text, po_id): | |
| co_normalized = self._normalize_text(co_text) | |
| co_words = set(co_normalized.split()) | |
| keywords = PO_KEYWORDS.get(po_id, []) | |
| if not keywords: | |
| return 0.0 | |
| matched_count = 0 | |
| for keyword in keywords: | |
| keyword_normalized = self._normalize_text(keyword) | |
| if len(keyword_normalized.split()) == 1: | |
| if keyword_normalized in co_words: | |
| matched_count += 1 | |
| else: | |
| if keyword_normalized in co_normalized: | |
| matched_count += 2 | |
| if matched_count == 0: | |
| return 0.0 | |
| elif matched_count <= 3: | |
| return 0.3 | |
| elif matched_count <= 6: | |
| return 0.6 | |
| else: | |
| return min(1.0, matched_count / len(keywords) * 3.0) | |
| def map_co_to_pos_semantic(self, co_text): | |
| co_embedding = self.model.encode([co_text])[0] | |
| results = [] | |
| for po_id, po_embedding in self.po_embeddings.items(): | |
| similarity = float(cosine_similarity([co_embedding], [po_embedding])[0][0]) | |
| if similarity > 0.7: | |
| strength, confidence = 3, "high" | |
| elif similarity > 0.5: | |
| strength, confidence = 2, "medium" | |
| elif similarity > 0.3: | |
| strength, confidence = 1, "low" | |
| else: | |
| strength, confidence = 0, "very low" | |
| results.append({'po_id': po_id, 'score': round(similarity, 3), 'semantic_score': round(similarity, 3), 'strength': strength, 'po_description': OFFICIAL_PO_DEFINITIONS[po_id], 'confidence': confidence, 'method': 'semantic_only'}) | |
| return sorted(results, key=lambda x: x['score'], reverse=True) | |
| def map_co_to_pos_hybrid(self, co_text): | |
| co_embedding = self.model.encode([co_text])[0] | |
| results = [] | |
| for po_id, po_embedding in self.po_embeddings.items(): | |
| semantic_score = float(cosine_similarity([co_embedding], [po_embedding])[0][0]) | |
| keyword_score = self._calculate_keyword_score(co_text, po_id) | |
| final_score = (0.85 * semantic_score) + (0.15 * keyword_score) | |
| if final_score > 0.7: | |
| strength, confidence = 3, "high" | |
| elif final_score > 0.5: | |
| strength, confidence = 2, "medium" | |
| elif final_score > 0.3: | |
| strength, confidence = 1, "low" | |
| else: | |
| strength, confidence = 0, "very low" | |
| results.append({'po_id': po_id, 'score': round(final_score, 3), 'semantic_score': round(semantic_score, 3), 'keyword_score': round(keyword_score, 3), 'strength': strength, 'po_description': OFFICIAL_PO_DEFINITIONS[po_id], 'confidence': confidence, 'method': 'hybrid'}) | |
| return sorted(results, key=lambda x: x['score'], reverse=True) | |
| app = FastAPI(title="CO-PO Mapping API", version="2.0.0") | |
| app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"]) | |
| mapper = None | |
| async def startup(): | |
| global mapper | |
| mapper = FineTunedCOPOMapper() | |
| class CORequest(BaseModel): | |
| co_text: str | |
| class BatchCORequest(BaseModel): | |
| co_texts: List[str] | |
| max_cos: int = 50 | |
| class POMapping(BaseModel): | |
| po_id: str | |
| score: float | |
| semantic_score: float | |
| keyword_score: Optional[float] = None | |
| strength: int | |
| po_description: str | |
| confidence: str | |
| method: str | |
| class MappingResponse(BaseModel): | |
| co_text: str | |
| total_pos: int | |
| method: str | |
| mappings: List[POMapping] | |
| class BatchMappingResponse(BaseModel): | |
| total_cos: int | |
| method: str | |
| results: List[Dict[str, Any]] | |
| async def root(): | |
| return {"message": "CO-PO Mapping API", "version": "2.0.0", "status": "active"} | |
| async def health(): | |
| return {"status": "healthy", "model_loaded": mapper is not None} | |
| async def map_semantic(request: CORequest): | |
| if not request.co_text or not request.co_text.strip(): | |
| raise HTTPException(400, "CO text cannot be empty") | |
| results = mapper.map_co_to_pos_semantic(request.co_text) | |
| return MappingResponse(co_text=request.co_text, total_pos=len(results), method="semantic_only", mappings=[POMapping(**r) for r in results]) | |
| async def map_hybrid(request: CORequest): | |
| if not request.co_text or not request.co_text.strip(): | |
| raise HTTPException(400, "CO text cannot be empty") | |
| results = mapper.map_co_to_pos_hybrid(request.co_text) | |
| return MappingResponse(co_text=request.co_text, total_pos=len(results), method="hybrid", mappings=[POMapping(**r) for r in results]) | |
| async def map_batch_semantic(request: BatchCORequest): | |
| if not request.co_texts or len(request.co_texts) == 0: | |
| raise HTTPException(400, "At least one CO text required") | |
| if len(request.co_texts) > request.max_cos: | |
| raise HTTPException(400, f"Maximum {request.max_cos} COs allowed per batch") | |
| results = [] | |
| for co_text in request.co_texts: | |
| if not co_text or not co_text.strip(): | |
| continue | |
| mappings = mapper.map_co_to_pos_semantic(co_text) | |
| results.append({ | |
| "co_text": co_text, | |
| "total_pos": len(mappings), | |
| "method": "semantic_only", | |
| "mappings": mappings | |
| }) | |
| return BatchMappingResponse(total_cos=len(results), method="semantic_only", results=results) | |
| async def map_batch_hybrid(request: BatchCORequest): | |
| if not request.co_texts or len(request.co_texts) == 0: | |
| raise HTTPException(400, "At least one CO text required") | |
| if len(request.co_texts) > request.max_cos: | |
| raise HTTPException(400, f"Maximum {request.max_cos} COs allowed per batch") | |
| results = [] | |
| for co_text in request.co_texts: | |
| if not co_text or not co_text.strip(): | |
| continue | |
| mappings = mapper.map_co_to_pos_hybrid(co_text) | |
| results.append({ | |
| "co_text": co_text, | |
| "total_pos": len(mappings), | |
| "method": "hybrid", | |
| "mappings": mappings | |
| }) | |
| return BatchMappingResponse(total_cos=len(results), method="hybrid", results=results) | |