MakPr016 commited on
Commit
ab660b6
·
1 Parent(s): 3435694

Fixed build

Browse files
Files changed (1) hide show
  1. app.py +112 -159
app.py CHANGED
@@ -11,8 +11,6 @@ from dotenv import load_dotenv
11
 
12
  load_dotenv()
13
 
14
-
15
-
16
  OFFICIAL_PO_DEFINITIONS = {
17
  "PO1": "Apply the knowledge of mathematics, science, engineering fundamentals, and an engineering specialization to the solution of complex engineering problems",
18
  "PO2": "Identify, formulate, review research literature, and analyze complex engineering problems reaching substantiated conclusions using first principles of mathematics, natural sciences, and engineering sciences",
@@ -27,7 +25,6 @@ OFFICIAL_PO_DEFINITIONS = {
27
  "PO11": "Demonstrate knowledge and understanding of the engineering and management principles and apply these to one's own work, as a member and leader in a team, to manage projects and in multidisciplinary environments"
28
  }
29
 
30
-
31
  BLOOM_LEVEL_DEFINITIONS = {
32
  "Remember": "Recall facts, terms, basic concepts, and answers without necessarily understanding",
33
  "Understand": "Demonstrate understanding of facts and ideas by organizing, comparing, translating, interpreting",
@@ -37,177 +34,166 @@ BLOOM_LEVEL_DEFINITIONS = {
37
  "Create": "Compile information together in a different way by combining elements in new patterns or proposing alternative solutions"
38
  }
39
 
40
-
41
  PO_KEYWORDS = {
42
  "PO1": [
43
- "knowledge", "mathematics", "math", "science", "computing", "engineering",
44
- "fundamental", "theory", "concept", "principle", "algorithm", "data structure",
45
- "programming", "software", "hardware", "circuit", "system", "analysis",
46
  "understand", "explain", "apply", "technical", "computer",
47
  "physics", "chemistry", "material", "thermodynamics", "mechanics", "electricity",
48
  "waves", "optics", "equilibrium", "conservation", "kinetics", "diffusion",
49
- "reaction rates", "calculus", "linear algebra", "differential equations",
50
- "probability", "statistics", "regression", "interpolation", "transforms",
51
- "numerical", "integration", "error analysis", "kinematics", "dynamics",
52
- "fluid flow", "heat transfer", "stress", "strain", "control", "signals",
53
- "embedded", "database", "OS", "VLSI", "power systems", "machine design",
54
- "networking", "modelling", "compute", "derive", "formulate", "validate",
55
- "solve", "approximation", "assumptions", "constraints", "simulation",
56
- "optimization", "verification", "problem-solving", "quantitative",
57
- "dimensional", "specialization", "theory-based", "unit analysis",
58
  "abstraction", "material properties", "conservation laws", "estimation",
59
  "numerical methods", "numerical integration", "circuits", "control basics",
60
  "embedded basics", "OS concepts", "data structures", "advanced tools", "codes"
61
  ],
62
-
63
  "PO2": [
64
- "identify", "formulate", "analyze", "analysis", "problem", "research",
65
- "investigate", "investigation", "examine", "evaluate", "evaluation", "assess",
66
- "assessment", "literature", "study", "review", "complex", "assumptions",
67
- "constraints", "requirements", "decompose", "root cause", "hypothesis",
68
- "criteria", "metrics", "trade-offs", "case analysis", "feasibility", "risk",
69
- "sustainability", "data gathering", "pattern recognition", "model selection",
70
- "verification", "validation", "evidence", "argumentation", "conclusion",
71
- "justification", "sensitivity", "sensitivity analysis", "physical principles",
72
- "experimental", "measurement", "uncertainty", "empirical", "statistical",
73
- "hypothesis testing", "confidence intervals", "correlation", "multivariate",
74
- "error propagation", "failure modes", "load cases", "boundary conditions",
75
- "specifications", "performance limits", "safety factors", "standards",
76
- "cybersecurity", "grid stability", "bioprocess", "substantiated",
77
  "phenomena", "empirical evidence", "statistical analysis", "standards baseline",
78
  "domain constraints", "complex scenarios", "advanced methods", "first principles",
79
  "literature review", "complex problems"
80
  ],
81
-
82
  "PO3": [
83
- "design", "create", "develop", "build", "implement", "implementation",
84
- "construct", "architecture", "model", "prototype", "system", "component",
85
- "solution", "innovative", "creative", "synthesize", "specification",
86
- "requirements", "constraints", "whole-life cost", "net zero", "sustainability",
87
- "public health", "safety", "stakeholder", "trade-off", "optimization",
88
- "feasibility", "innovation", "integration", "standards", "compliance",
89
- "documentation", "design process", "cost analysis", "environmental impact",
90
- "resource efficiency", "life-cycle assessment", "manufacturability",
91
- "maintainability", "codes", "project management", "plan", "propose",
92
  "integrate", "optimize", "specify", "creative solutions", "system components",
93
- "processes", "meet needs", "appropriate consideration", "cultural",
94
  "societal", "environmental considerations"
95
  ],
96
-
97
  "PO4": [
98
- "experiment", "test", "testing", "measure", "measurement", "data", "analysis",
99
- "interpret", "interpretation", "validation", "verify", "verification",
100
  "research", "investigation", "empirical", "benchmark", "evaluate",
101
- "experimental design", "hypothesis testing", "methodology", "data collection",
102
- "analyze data", "validate", "model calibration", "research-based",
103
- "literature review", "protocol", "observation", "inference", "reproducibility",
104
- "documentation", "research methodology", "peer review", "experimental setup",
105
  "critical review", "emerging issues", "investigate", "calibrate",
106
- "design of experiments", "synthesis", "synthesis of information",
107
  "valid conclusions", "research methods", "research-based knowledge",
108
  "interpretation of data"
109
  ],
110
-
111
  "PO5": [
112
- "tool", "tools", "technology", "software", "framework", "platform", "library",
113
- "IDE", "programming", "language", "modern", "technique", "method", "approach",
114
- "implement", "application", "use", "utilize", "simulate", "modeling",
115
- "calibrate", "CAD", "CAE", "algorithm", "limitations", "benchmarking",
116
- "tool selection", "automation", "digital twin", "data visualization",
117
- "computation", "process", "manufacturing", "deployment", "instrumentation",
118
  "digital tools", "operate", "program", "automate", "simulation",
119
  "modern engineering tools", "IT tools", "prediction", "modeling",
120
  "understanding limitations", "CAD/CAE tools", "modern techniques",
121
  "appropriate techniques", "resources", "complex engineering activities"
122
  ],
123
-
124
  "PO6": [
125
- "society", "social", "environmental", "environment", "sustainability",
126
- "sustainable", "impact", "ethical", "responsible", "responsibility",
127
- "green", "energy", "carbon", "climate", "eco", "community", "culture",
128
- "global", "societal impact", "legal", "health", "safety", "cultural",
129
- "economic impacts", "standards", "compliance", "lifecycle assessment",
130
- "regulation", "public welfare", "risk assessment", "ethics", "policy",
131
- "public health", "safety standards", "cost-benefit", "resource allocation",
132
- "EIA", "legal framework", "assess", "justify", "comply", "recommend",
133
  "judge", "critique", "environmental science", "reasoning", "contextual knowledge",
134
  "societal issues", "health issues", "safety issues", "legal issues",
135
  "cultural issues", "consequent responsibilities", "professional engineering practice",
136
  "engineer and society"
137
  ],
138
-
139
  "PO7": [
140
- "sustainability", "environmental impact", "resource efficiency", "renewable",
141
- "pollution", "waste management", "climate change", "conservation",
142
  "life-cycle assessment", "green design", "eco-friendly", "regulatory compliance",
143
- "sustainable development", "carbon footprint", "circular economy",
144
- "biodiversity", "ecosystem", "environmental degradation", "clean energy",
145
- "water conservation", "soil conservation", "regulation", "public welfare",
146
- "green technology", "climate policy", "renewable resources",
147
- "ecological balance", "professional engineering solutions",
148
  "societal contexts", "environmental contexts", "knowledge of sustainable development",
149
  "need for sustainable development", "understand impact"
150
  ],
151
-
152
  "PO8": [
153
- "ethics", "ethical", "professional", "integrity", "responsibility",
154
- "responsible", "conduct", "moral", "morality", "values", "principles",
155
  "principle", "honesty", "fairness", "accountability", "code of conduct",
156
- "inclusivity", "diversity", "compliance", "governance", "transparency",
157
- "confidentiality", "conflict of interest", "professional ethics",
158
- "moral values", "ethical behavior", "ethical dilemma", "social responsibility",
159
- "professional standards", "code of conduct", "bias mitigation",
160
  "inclusive behavior", "professional responsibility", "ethical decision-making",
161
- "professional norms", "ethical framework", "adhere", "demonstrate",
162
  "respect", "reflect", "act", "commit", "advocate", "ethical principles",
163
  "commit to ethics", "norms of engineering practice", "professional ethics and responsibilities"
164
  ],
165
-
166
  "PO9": [
167
- "team", "teams", "collaborate", "collaboration", "cooperative", "cooperation",
168
- "group", "leadership", "leader", "member", "members", "teamwork", "collective",
169
- "peer", "diverse", "diversity", "multicultural", "together", "coordinate",
170
- "roles", "team dynamics", "multidisciplinary", "conflict resolution",
171
- "accountability", "contribution", "communication", "delegation", "motivation",
172
  "feedback", "participation", "project management", "multidisciplinary integration",
173
- "cooperate", "lead", "participate", "contribute", "support", "facilitate",
174
  "manage", "team building", "interpersonal skills", "group work",
175
  "team coordination", "collaborative problem-solving", "function effectively",
176
  "individual", "member or leader", "diverse teams", "multidisciplinary settings"
177
  ],
178
-
179
  "PO10": [
180
- "communicate", "communication", "present", "presentation", "document",
181
- "documentation", "report", "write", "writing", "speak", "speaking",
182
  "explain", "articulate", "technical writing", "stakeholder", "audience",
183
- "clarity", "visualization", "inclusivity", "language differences",
184
- "comprehension", "oral communication", "written communication", "feedback",
185
- "listening", "negotiation", "reporting", "audience analysis", "illustrate",
186
- "summarize", "interpret", "discuss", "listen", "effective communication",
187
  "clear instructions", "design documentation", "presentation skills",
188
  "effective reports", "communicate effectively", "engineering community",
189
- "society at large", "comprehend", "write effective reports",
190
  "make effective presentations", "give and receive instructions",
191
  "complex engineering activities"
192
  ],
193
-
194
  "PO11": [
195
- "project", "projects", "management", "manage", "plan", "planning", "schedule",
196
- "scheduling", "resource", "resources", "budget", "cost", "timeline", "milestone",
197
  "risk", "decision", "economic", "strategy", "organize", "organization",
198
- "cost analysis", "resource allocation", "feasibility", "management principles",
199
- "economics", "decision-making", "leadership", "project execution", "monitoring",
200
- "evaluation", "procurement", "stakeholder management", "cost estimation",
201
  "budgeting", "risk management", "allocate", "execute", "coordinate",
202
- "financial management", "project planning", "learning", "learn", "adapt",
203
- "adapting", "adaptability", "emerging", "new", "continuous", "lifelong",
204
- "skill", "skills", "development", "growth", "technology", "technologies",
205
  "trend", "trends", "innovation", "self-learn", "update", "evolve", "change",
206
- "technological change", "keep abreast", "critical thinking", "reflect",
207
- "independent learning", "update skills", "professional development", "curiosity",
208
- "continuous improvement", "resilience", "self-learning", "independent study",
209
- "critical review", "emerging issues", "reflection", "continuous learning",
210
- "skill enhancement", "knowledge updating", "pursue", "explore", "inquire",
211
  "improve", "recognize need", "preparation", "ability to engage",
212
  "independent learning", "life-long learning", "broadest context",
213
  "engineering and management principles", "apply to own work",
@@ -216,16 +202,11 @@ PO_KEYWORDS = {
216
  ]
217
  }
218
 
219
-
220
  class FineTunedCOPOMapper:
221
  def __init__(self):
222
  print("Loading model...")
223
  print(f"===== Application Startup at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} =====")
224
-
225
- # Check if HF_TOKEN is available (for HuggingFace Spaces deployment)
226
  hf_token = os.environ.get("HF_TOKEN")
227
-
228
- # Try HuggingFace first if token is available (for Spaces deployment with private model)
229
  if hf_token:
230
  print("HF_TOKEN found. Attempting to load from HuggingFace (private model)...")
231
  try:
@@ -235,15 +216,19 @@ class FineTunedCOPOMapper:
235
  trust_remote_code=False
236
  )
237
  print("✓ Model loaded successfully from HuggingFace")
238
- return
239
  except Exception as e:
240
  print(f"✗ HuggingFace load failed: {str(e)}")
241
  print("Falling back to local model...")
242
-
243
- # Fall back to local model path (for local development)
 
 
 
 
 
 
244
  model_path = os.path.join(os.path.dirname(__file__), "..", "co_po_bloom_model_v3_finetune")
245
  model_path = os.path.abspath(model_path)
246
-
247
  try:
248
  print(f"Attempting to load from local path: {model_path}")
249
  self.model = SentenceTransformer(
@@ -258,22 +243,18 @@ class FineTunedCOPOMapper:
258
  "Please ensure either HF_TOKEN is set (for private model) or "
259
  "the model files exist in ../co_po_bloom_model_v3_finetuned/"
260
  )
261
-
262
- self.po_embeddings = {}
263
- self.bloom_embeddings = {}
264
- self._precompute_embeddings()
265
-
266
  def _precompute_embeddings(self):
267
  for po_id, po_text in OFFICIAL_PO_DEFINITIONS.items():
268
  self.po_embeddings[po_id] = self.model.encode([po_text])[0]
269
  for level, definition in BLOOM_LEVEL_DEFINITIONS.items():
270
  self.bloom_embeddings[level] = self.model.encode([definition])[0]
271
-
272
  def _normalize_text(self, text):
273
  text = text.lower()
274
  text = re.sub(r'[^\w\s]', ' ', text)
275
  return re.sub(r'\s+', ' ', text).strip()
276
-
277
  def _calculate_keyword_score(self, co_text, po_id):
278
  co_normalized = self._normalize_text(co_text)
279
  co_words = set(co_normalized.split())
@@ -297,17 +278,15 @@ class FineTunedCOPOMapper:
297
  return 0.6
298
  else:
299
  return min(1.0, matched_count / len(keywords) * 3.0)
300
-
301
  def _apply_constraints(self, results, co_text):
302
  po_scores = {r['po_id']: r['score'] for r in results}
303
-
304
  po_hierarchy = ['PO1', 'PO2', 'PO3', 'PO4']
305
  for i in range(len(po_hierarchy) - 1):
306
  current_po = po_hierarchy[i]
307
  next_po = po_hierarchy[i + 1]
308
  if po_scores[current_po] < po_scores[next_po]:
309
  po_scores[next_po] = po_scores[current_po]
310
-
311
  po7_keywords = [
312
  "sustainability", "environmental", "resource efficiency", "renewable",
313
  "pollution", "waste", "climate", "conservation", "eco", "green",
@@ -315,7 +294,6 @@ class FineTunedCOPOMapper:
315
  ]
316
  co_lower = co_text.lower()
317
  po7_keyword_matches = sum(1 for keyword in po7_keywords if keyword in co_lower)
318
-
319
  if po7_keyword_matches >= 3:
320
  po_scores['PO7'] = 0.8
321
  elif po7_keyword_matches == 2:
@@ -324,13 +302,11 @@ class FineTunedCOPOMapper:
324
  po_scores['PO7'] = 0.6
325
  else:
326
  po_scores['PO7'] = 0.4
327
-
328
  po11_keywords = [
329
  "project", "management", "plan", "budget", "schedule", "resource",
330
  "timeline", "milestone", "risk", "team", "coordinate", "execute"
331
  ]
332
  po11_keyword_matches = sum(1 for keyword in po11_keywords if keyword in co_lower)
333
-
334
  if po11_keyword_matches >= 3:
335
  po_scores['PO11'] = 0.8
336
  elif po11_keyword_matches == 2:
@@ -339,28 +315,24 @@ class FineTunedCOPOMapper:
339
  po_scores['PO11'] = 0.6
340
  else:
341
  po_scores['PO11'] = 0.4
342
-
343
  for result in results:
344
  result['score'] = round(po_scores[result['po_id']], 3)
345
-
346
- return sorted(results, key=lambda x: x['score'], reverse=True)
347
-
348
  def predict_bloom_level(self, co_text):
349
  co_embedding = self.model.encode([co_text])[0]
350
  bloom_scores = {}
351
  for level, bloom_embedding in self.bloom_embeddings.items():
352
  similarity = float(cosine_similarity([co_embedding], [bloom_embedding])[0][0])
353
  bloom_scores[level] = round(similarity, 4)
354
-
355
  predicted_level = max(bloom_scores.items(), key=lambda x: x[1])
356
-
357
  return {
358
  'predicted_level': predicted_level[0],
359
  'confidence': predicted_level[1],
360
  'all_scores': bloom_scores,
361
  'description': BLOOM_LEVEL_DEFINITIONS[predicted_level[0]]
362
  }
363
-
364
  def map_co_to_pos_semantic(self, co_text):
365
  co_embedding = self.model.encode([co_text])[0]
366
  results = []
@@ -385,7 +357,7 @@ class FineTunedCOPOMapper:
385
  })
386
  results = self._apply_constraints(results, co_text)
387
  return results
388
-
389
  def map_co_to_pos_hybrid(self, co_text):
390
  co_embedding = self.model.encode([co_text])[0]
391
  results = []
@@ -414,31 +386,25 @@ class FineTunedCOPOMapper:
414
  results = self._apply_constraints(results, co_text)
415
  return results
416
 
417
-
418
  app = FastAPI(title="CO-PO Mapping API", version="3.0.0 (with Bloom's)")
419
  app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"])
420
 
421
-
422
  mapper = None
423
 
424
-
425
  @app.on_event("startup")
426
  async def startup():
427
  global mapper
428
  mapper = FineTunedCOPOMapper()
429
 
430
-
431
  class CORequest(BaseModel):
432
  co_text: str
433
  include_bloom: bool = True
434
 
435
-
436
  class BatchCORequest(BaseModel):
437
  co_texts: List[str]
438
  include_bloom: bool = True
439
  max_cos: int = 50
440
 
441
-
442
  class POMapping(BaseModel):
443
  po_id: str
444
  score: float
@@ -449,14 +415,12 @@ class POMapping(BaseModel):
449
  confidence: str
450
  method: str
451
 
452
-
453
  class BloomPrediction(BaseModel):
454
  predicted_level: str
455
  confidence: float
456
  all_scores: Dict[str, float]
457
  description: str
458
 
459
-
460
  class MappingResponse(BaseModel):
461
  co_text: str
462
  total_pos: int
@@ -464,13 +428,11 @@ class MappingResponse(BaseModel):
464
  mappings: List[POMapping]
465
  bloom_prediction: Optional[BloomPrediction] = None
466
 
467
-
468
  class BatchMappingResponse(BaseModel):
469
  total_cos: int
470
  method: str
471
  results: List[Dict[str, Any]]
472
 
473
-
474
  @app.get("/")
475
  async def root():
476
  return {
@@ -480,12 +442,10 @@ async def root():
480
  "features": ["PO Mapping", "Bloom's Taxonomy", "Semantic + Hybrid modes"]
481
  }
482
 
483
-
484
  @app.get("/health")
485
  async def health():
486
  return {"status": "healthy", "model_loaded": mapper is not None}
487
 
488
-
489
  @app.post("/map/semantic", response_model=MappingResponse)
490
  async def map_semantic(request: CORequest):
491
  if not request.co_text or not request.co_text.strip():
@@ -500,7 +460,6 @@ async def map_semantic(request: CORequest):
500
  bloom_prediction=BloomPrediction(**bloom) if bloom else None
501
  )
502
 
503
-
504
  @app.post("/map/hybrid", response_model=MappingResponse)
505
  async def map_hybrid(request: CORequest):
506
  if not request.co_text or not request.co_text.strip():
@@ -515,14 +474,12 @@ async def map_hybrid(request: CORequest):
515
  bloom_prediction=BloomPrediction(**bloom) if bloom else None
516
  )
517
 
518
-
519
  @app.post("/map/batch/semantic", response_model=BatchMappingResponse)
520
  async def map_batch_semantic(request: BatchCORequest):
521
  if not request.co_texts or len(request.co_texts) == 0:
522
  raise HTTPException(400, "At least one CO text required")
523
  if len(request.co_texts) > request.max_cos:
524
  raise HTTPException(400, f"Maximum {request.max_cos} COs allowed per batch")
525
-
526
  results = []
527
  for co_text in request.co_texts:
528
  if not co_text or not co_text.strip():
@@ -536,17 +493,14 @@ async def map_batch_semantic(request: BatchCORequest):
536
  "mappings": mappings,
537
  "bloom_prediction": bloom
538
  })
539
-
540
  return BatchMappingResponse(total_cos=len(results), method="semantic_only", results=results)
541
 
542
-
543
  @app.post("/map/batch/hybrid", response_model=BatchMappingResponse)
544
  async def map_batch_hybrid(request: BatchCORequest):
545
  if not request.co_texts or len(request.co_texts) == 0:
546
  raise HTTPException(400, "At least one CO text required")
547
  if len(request.co_texts) > request.max_cos:
548
  raise HTTPException(400, f"Maximum {request.max_cos} COs allowed per batch")
549
-
550
  results = []
551
  for co_text in request.co_texts:
552
  if not co_text or not co_text.strip():
@@ -560,5 +514,4 @@ async def map_batch_hybrid(request: BatchCORequest):
560
  "mappings": mappings,
561
  "bloom_prediction": bloom
562
  })
563
-
564
  return BatchMappingResponse(total_cos=len(results), method="hybrid", results=results)
 
11
 
12
  load_dotenv()
13
 
 
 
14
  OFFICIAL_PO_DEFINITIONS = {
15
  "PO1": "Apply the knowledge of mathematics, science, engineering fundamentals, and an engineering specialization to the solution of complex engineering problems",
16
  "PO2": "Identify, formulate, review research literature, and analyze complex engineering problems reaching substantiated conclusions using first principles of mathematics, natural sciences, and engineering sciences",
 
25
  "PO11": "Demonstrate knowledge and understanding of the engineering and management principles and apply these to one's own work, as a member and leader in a team, to manage projects and in multidisciplinary environments"
26
  }
27
 
 
28
  BLOOM_LEVEL_DEFINITIONS = {
29
  "Remember": "Recall facts, terms, basic concepts, and answers without necessarily understanding",
30
  "Understand": "Demonstrate understanding of facts and ideas by organizing, comparing, translating, interpreting",
 
34
  "Create": "Compile information together in a different way by combining elements in new patterns or proposing alternative solutions"
35
  }
36
 
 
37
  PO_KEYWORDS = {
38
  "PO1": [
39
+ "knowledge", "mathematics", "math", "science", "computing", "engineering",
40
+ "fundamental", "theory", "concept", "principle", "algorithm", "data structure",
41
+ "programming", "software", "hardware", "circuit", "system", "analysis",
42
  "understand", "explain", "apply", "technical", "computer",
43
  "physics", "chemistry", "material", "thermodynamics", "mechanics", "electricity",
44
  "waves", "optics", "equilibrium", "conservation", "kinetics", "diffusion",
45
+ "reaction rates", "calculus", "linear algebra", "differential equations",
46
+ "probability", "statistics", "regression", "interpolation", "transforms",
47
+ "numerical", "integration", "error analysis", "kinematics", "dynamics",
48
+ "fluid flow", "heat transfer", "stress", "strain", "control", "signals",
49
+ "embedded", "database", "OS", "VLSI", "power systems", "machine design",
50
+ "networking", "modelling", "compute", "derive", "formulate", "validate",
51
+ "solve", "approximation", "assumptions", "constraints", "simulation",
52
+ "optimization", "verification", "problem-solving", "quantitative",
53
+ "dimensional", "specialization", "theory-based", "unit analysis",
54
  "abstraction", "material properties", "conservation laws", "estimation",
55
  "numerical methods", "numerical integration", "circuits", "control basics",
56
  "embedded basics", "OS concepts", "data structures", "advanced tools", "codes"
57
  ],
 
58
  "PO2": [
59
+ "identify", "formulate", "analyze", "analysis", "problem", "research",
60
+ "investigate", "investigation", "examine", "evaluate", "evaluation", "assess",
61
+ "assessment", "literature", "study", "review", "complex", "assumptions",
62
+ "constraints", "requirements", "decompose", "root cause", "hypothesis",
63
+ "criteria", "metrics", "trade-offs", "case analysis", "feasibility", "risk",
64
+ "sustainability", "data gathering", "pattern recognition", "model selection",
65
+ "verification", "validation", "evidence", "argumentation", "conclusion",
66
+ "justification", "sensitivity", "sensitivity analysis", "physical principles",
67
+ "experimental", "measurement", "uncertainty", "empirical", "statistical",
68
+ "hypothesis testing", "confidence intervals", "correlation", "multivariate",
69
+ "error propagation", "failure modes", "load cases", "boundary conditions",
70
+ "specifications", "performance limits", "safety factors", "standards",
71
+ "cybersecurity", "grid stability", "bioprocess", "substantiated",
72
  "phenomena", "empirical evidence", "statistical analysis", "standards baseline",
73
  "domain constraints", "complex scenarios", "advanced methods", "first principles",
74
  "literature review", "complex problems"
75
  ],
 
76
  "PO3": [
77
+ "design", "create", "develop", "build", "implement", "implementation",
78
+ "construct", "architecture", "model", "prototype", "system", "component",
79
+ "solution", "innovative", "creative", "synthesize", "specification",
80
+ "requirements", "constraints", "whole-life cost", "net zero", "sustainability",
81
+ "public health", "safety", "stakeholder", "trade-off", "optimization",
82
+ "feasibility", "innovation", "integration", "standards", "compliance",
83
+ "documentation", "design process", "cost analysis", "environmental impact",
84
+ "resource efficiency", "life-cycle assessment", "manufacturability",
85
+ "maintainability", "codes", "project management", "plan", "propose",
86
  "integrate", "optimize", "specify", "creative solutions", "system components",
87
+ "processes", "meet needs", "appropriate consideration", "cultural",
88
  "societal", "environmental considerations"
89
  ],
 
90
  "PO4": [
91
+ "experiment", "test", "testing", "measure", "measurement", "data", "analysis",
92
+ "interpret", "interpretation", "validation", "verify", "verification",
93
  "research", "investigation", "empirical", "benchmark", "evaluate",
94
+ "experimental design", "hypothesis testing", "methodology", "data collection",
95
+ "analyze data", "validate", "model calibration", "research-based",
96
+ "literature review", "protocol", "observation", "inference", "reproducibility",
97
+ "documentation", "research methodology", "peer review", "experimental setup",
98
  "critical review", "emerging issues", "investigate", "calibrate",
99
+ "design of experiments", "synthesis", "synthesis of information",
100
  "valid conclusions", "research methods", "research-based knowledge",
101
  "interpretation of data"
102
  ],
 
103
  "PO5": [
104
+ "tool", "tools", "technology", "software", "framework", "platform", "library",
105
+ "IDE", "programming", "language", "modern", "technique", "method", "approach",
106
+ "implement", "application", "use", "utilize", "simulate", "modeling",
107
+ "calibrate", "CAD", "CAE", "algorithm", "limitations", "benchmarking",
108
+ "tool selection", "automation", "digital twin", "data visualization",
109
+ "computation", "process", "manufacturing", "deployment", "instrumentation",
110
  "digital tools", "operate", "program", "automate", "simulation",
111
  "modern engineering tools", "IT tools", "prediction", "modeling",
112
  "understanding limitations", "CAD/CAE tools", "modern techniques",
113
  "appropriate techniques", "resources", "complex engineering activities"
114
  ],
 
115
  "PO6": [
116
+ "society", "social", "environmental", "environment", "sustainability",
117
+ "sustainable", "impact", "ethical", "responsible", "responsibility",
118
+ "green", "energy", "carbon", "climate", "eco", "community", "culture",
119
+ "global", "societal impact", "legal", "health", "safety", "cultural",
120
+ "economic impacts", "standards", "compliance", "lifecycle assessment",
121
+ "regulation", "public welfare", "risk assessment", "ethics", "policy",
122
+ "public health", "safety standards", "cost-benefit", "resource allocation",
123
+ "EIA", "legal framework", "assess", "justify", "comply", "recommend",
124
  "judge", "critique", "environmental science", "reasoning", "contextual knowledge",
125
  "societal issues", "health issues", "safety issues", "legal issues",
126
  "cultural issues", "consequent responsibilities", "professional engineering practice",
127
  "engineer and society"
128
  ],
 
129
  "PO7": [
130
+ "sustainability", "environmental impact", "resource efficiency", "renewable",
131
+ "pollution", "waste management", "climate change", "conservation",
132
  "life-cycle assessment", "green design", "eco-friendly", "regulatory compliance",
133
+ "sustainable development", "carbon footprint", "circular economy",
134
+ "biodiversity", "ecosystem", "environmental degradation", "clean energy",
135
+ "water conservation", "soil conservation", "regulation", "public welfare",
136
+ "green technology", "climate policy", "renewable resources",
137
+ "ecological balance", "professional engineering solutions",
138
  "societal contexts", "environmental contexts", "knowledge of sustainable development",
139
  "need for sustainable development", "understand impact"
140
  ],
 
141
  "PO8": [
142
+ "ethics", "ethical", "professional", "integrity", "responsibility",
143
+ "responsible", "conduct", "moral", "morality", "values", "principles",
144
  "principle", "honesty", "fairness", "accountability", "code of conduct",
145
+ "inclusivity", "diversity", "compliance", "governance", "transparency",
146
+ "confidentiality", "conflict of interest", "professional ethics",
147
+ "moral values", "ethical behavior", "ethical dilemma", "social responsibility",
148
+ "professional standards", "code of conduct", "bias mitigation",
149
  "inclusive behavior", "professional responsibility", "ethical decision-making",
150
+ "professional norms", "ethical framework", "adhere", "demonstrate",
151
  "respect", "reflect", "act", "commit", "advocate", "ethical principles",
152
  "commit to ethics", "norms of engineering practice", "professional ethics and responsibilities"
153
  ],
 
154
  "PO9": [
155
+ "team", "teams", "collaborate", "collaboration", "cooperative", "cooperation",
156
+ "group", "leadership", "leader", "member", "members", "teamwork", "collective",
157
+ "peer", "diverse", "diversity", "multicultural", "together", "coordinate",
158
+ "roles", "team dynamics", "multidisciplinary", "conflict resolution",
159
+ "accountability", "contribution", "communication", "delegation", "motivation",
160
  "feedback", "participation", "project management", "multidisciplinary integration",
161
+ "cooperate", "lead", "participate", "contribute", "support", "facilitate",
162
  "manage", "team building", "interpersonal skills", "group work",
163
  "team coordination", "collaborative problem-solving", "function effectively",
164
  "individual", "member or leader", "diverse teams", "multidisciplinary settings"
165
  ],
 
166
  "PO10": [
167
+ "communicate", "communication", "present", "presentation", "document",
168
+ "documentation", "report", "write", "writing", "speak", "speaking",
169
  "explain", "articulate", "technical writing", "stakeholder", "audience",
170
+ "clarity", "visualization", "inclusivity", "language differences",
171
+ "comprehension", "oral communication", "written communication", "feedback",
172
+ "listening", "negotiation", "reporting", "audience analysis", "illustrate",
173
+ "summarize", "interpret", "discuss", "listen", "effective communication",
174
  "clear instructions", "design documentation", "presentation skills",
175
  "effective reports", "communicate effectively", "engineering community",
176
+ "society at large", "comprehend", "write effective reports",
177
  "make effective presentations", "give and receive instructions",
178
  "complex engineering activities"
179
  ],
 
180
  "PO11": [
181
+ "project", "projects", "management", "manage", "plan", "planning", "schedule",
182
+ "scheduling", "resource", "resources", "budget", "cost", "timeline", "milestone",
183
  "risk", "decision", "economic", "strategy", "organize", "organization",
184
+ "cost analysis", "resource allocation", "feasibility", "management principles",
185
+ "economics", "decision-making", "leadership", "project execution", "monitoring",
186
+ "evaluation", "procurement", "stakeholder management", "cost estimation",
187
  "budgeting", "risk management", "allocate", "execute", "coordinate",
188
+ "financial management", "project planning", "learning", "learn", "adapt",
189
+ "adapting", "adaptability", "emerging", "new", "continuous", "lifelong",
190
+ "skill", "skills", "development", "growth", "technology", "technologies",
191
  "trend", "trends", "innovation", "self-learn", "update", "evolve", "change",
192
+ "technological change", "keep abreast", "critical thinking", "reflect",
193
+ "independent learning", "update skills", "professional development", "curiosity",
194
+ "continuous improvement", "resilience", "self-learning", "independent study",
195
+ "critical review", "emerging issues", "reflection", "continuous learning",
196
+ "skill enhancement", "knowledge updating", "pursue", "explore", "inquire",
197
  "improve", "recognize need", "preparation", "ability to engage",
198
  "independent learning", "life-long learning", "broadest context",
199
  "engineering and management principles", "apply to own work",
 
202
  ]
203
  }
204
 
 
205
  class FineTunedCOPOMapper:
206
  def __init__(self):
207
  print("Loading model...")
208
  print(f"===== Application Startup at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} =====")
 
 
209
  hf_token = os.environ.get("HF_TOKEN")
 
 
210
  if hf_token:
211
  print("HF_TOKEN found. Attempting to load from HuggingFace (private model)...")
212
  try:
 
216
  trust_remote_code=False
217
  )
218
  print("✓ Model loaded successfully from HuggingFace")
 
219
  except Exception as e:
220
  print(f"✗ HuggingFace load failed: {str(e)}")
221
  print("Falling back to local model...")
222
+ self._load_local_model()
223
+ else:
224
+ self._load_local_model()
225
+ self.po_embeddings = {}
226
+ self.bloom_embeddings = {}
227
+ self._precompute_embeddings()
228
+
229
+ def _load_local_model(self):
230
  model_path = os.path.join(os.path.dirname(__file__), "..", "co_po_bloom_model_v3_finetune")
231
  model_path = os.path.abspath(model_path)
 
232
  try:
233
  print(f"Attempting to load from local path: {model_path}")
234
  self.model = SentenceTransformer(
 
243
  "Please ensure either HF_TOKEN is set (for private model) or "
244
  "the model files exist in ../co_po_bloom_model_v3_finetuned/"
245
  )
246
+
 
 
 
 
247
  def _precompute_embeddings(self):
248
  for po_id, po_text in OFFICIAL_PO_DEFINITIONS.items():
249
  self.po_embeddings[po_id] = self.model.encode([po_text])[0]
250
  for level, definition in BLOOM_LEVEL_DEFINITIONS.items():
251
  self.bloom_embeddings[level] = self.model.encode([definition])[0]
252
+
253
  def _normalize_text(self, text):
254
  text = text.lower()
255
  text = re.sub(r'[^\w\s]', ' ', text)
256
  return re.sub(r'\s+', ' ', text).strip()
257
+
258
  def _calculate_keyword_score(self, co_text, po_id):
259
  co_normalized = self._normalize_text(co_text)
260
  co_words = set(co_normalized.split())
 
278
  return 0.6
279
  else:
280
  return min(1.0, matched_count / len(keywords) * 3.0)
281
+
282
  def _apply_constraints(self, results, co_text):
283
  po_scores = {r['po_id']: r['score'] for r in results}
 
284
  po_hierarchy = ['PO1', 'PO2', 'PO3', 'PO4']
285
  for i in range(len(po_hierarchy) - 1):
286
  current_po = po_hierarchy[i]
287
  next_po = po_hierarchy[i + 1]
288
  if po_scores[current_po] < po_scores[next_po]:
289
  po_scores[next_po] = po_scores[current_po]
 
290
  po7_keywords = [
291
  "sustainability", "environmental", "resource efficiency", "renewable",
292
  "pollution", "waste", "climate", "conservation", "eco", "green",
 
294
  ]
295
  co_lower = co_text.lower()
296
  po7_keyword_matches = sum(1 for keyword in po7_keywords if keyword in co_lower)
 
297
  if po7_keyword_matches >= 3:
298
  po_scores['PO7'] = 0.8
299
  elif po7_keyword_matches == 2:
 
302
  po_scores['PO7'] = 0.6
303
  else:
304
  po_scores['PO7'] = 0.4
 
305
  po11_keywords = [
306
  "project", "management", "plan", "budget", "schedule", "resource",
307
  "timeline", "milestone", "risk", "team", "coordinate", "execute"
308
  ]
309
  po11_keyword_matches = sum(1 for keyword in po11_keywords if keyword in co_lower)
 
310
  if po11_keyword_matches >= 3:
311
  po_scores['PO11'] = 0.8
312
  elif po11_keyword_matches == 2:
 
315
  po_scores['PO11'] = 0.6
316
  else:
317
  po_scores['PO11'] = 0.4
 
318
  for result in results:
319
  result['score'] = round(po_scores[result['po_id']], 3)
320
+ return sorted(results, key=lambda x: x['score'], reverse=True)
321
+
 
322
  def predict_bloom_level(self, co_text):
323
  co_embedding = self.model.encode([co_text])[0]
324
  bloom_scores = {}
325
  for level, bloom_embedding in self.bloom_embeddings.items():
326
  similarity = float(cosine_similarity([co_embedding], [bloom_embedding])[0][0])
327
  bloom_scores[level] = round(similarity, 4)
 
328
  predicted_level = max(bloom_scores.items(), key=lambda x: x[1])
 
329
  return {
330
  'predicted_level': predicted_level[0],
331
  'confidence': predicted_level[1],
332
  'all_scores': bloom_scores,
333
  'description': BLOOM_LEVEL_DEFINITIONS[predicted_level[0]]
334
  }
335
+
336
  def map_co_to_pos_semantic(self, co_text):
337
  co_embedding = self.model.encode([co_text])[0]
338
  results = []
 
357
  })
358
  results = self._apply_constraints(results, co_text)
359
  return results
360
+
361
  def map_co_to_pos_hybrid(self, co_text):
362
  co_embedding = self.model.encode([co_text])[0]
363
  results = []
 
386
  results = self._apply_constraints(results, co_text)
387
  return results
388
 
 
389
  app = FastAPI(title="CO-PO Mapping API", version="3.0.0 (with Bloom's)")
390
  app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"])
391
 
 
392
  mapper = None
393
 
 
394
  @app.on_event("startup")
395
  async def startup():
396
  global mapper
397
  mapper = FineTunedCOPOMapper()
398
 
 
399
  class CORequest(BaseModel):
400
  co_text: str
401
  include_bloom: bool = True
402
 
 
403
  class BatchCORequest(BaseModel):
404
  co_texts: List[str]
405
  include_bloom: bool = True
406
  max_cos: int = 50
407
 
 
408
  class POMapping(BaseModel):
409
  po_id: str
410
  score: float
 
415
  confidence: str
416
  method: str
417
 
 
418
  class BloomPrediction(BaseModel):
419
  predicted_level: str
420
  confidence: float
421
  all_scores: Dict[str, float]
422
  description: str
423
 
 
424
  class MappingResponse(BaseModel):
425
  co_text: str
426
  total_pos: int
 
428
  mappings: List[POMapping]
429
  bloom_prediction: Optional[BloomPrediction] = None
430
 
 
431
  class BatchMappingResponse(BaseModel):
432
  total_cos: int
433
  method: str
434
  results: List[Dict[str, Any]]
435
 
 
436
  @app.get("/")
437
  async def root():
438
  return {
 
442
  "features": ["PO Mapping", "Bloom's Taxonomy", "Semantic + Hybrid modes"]
443
  }
444
 
 
445
  @app.get("/health")
446
  async def health():
447
  return {"status": "healthy", "model_loaded": mapper is not None}
448
 
 
449
  @app.post("/map/semantic", response_model=MappingResponse)
450
  async def map_semantic(request: CORequest):
451
  if not request.co_text or not request.co_text.strip():
 
460
  bloom_prediction=BloomPrediction(**bloom) if bloom else None
461
  )
462
 
 
463
  @app.post("/map/hybrid", response_model=MappingResponse)
464
  async def map_hybrid(request: CORequest):
465
  if not request.co_text or not request.co_text.strip():
 
474
  bloom_prediction=BloomPrediction(**bloom) if bloom else None
475
  )
476
 
 
477
  @app.post("/map/batch/semantic", response_model=BatchMappingResponse)
478
  async def map_batch_semantic(request: BatchCORequest):
479
  if not request.co_texts or len(request.co_texts) == 0:
480
  raise HTTPException(400, "At least one CO text required")
481
  if len(request.co_texts) > request.max_cos:
482
  raise HTTPException(400, f"Maximum {request.max_cos} COs allowed per batch")
 
483
  results = []
484
  for co_text in request.co_texts:
485
  if not co_text or not co_text.strip():
 
493
  "mappings": mappings,
494
  "bloom_prediction": bloom
495
  })
 
496
  return BatchMappingResponse(total_cos=len(results), method="semantic_only", results=results)
497
 
 
498
  @app.post("/map/batch/hybrid", response_model=BatchMappingResponse)
499
  async def map_batch_hybrid(request: BatchCORequest):
500
  if not request.co_texts or len(request.co_texts) == 0:
501
  raise HTTPException(400, "At least one CO text required")
502
  if len(request.co_texts) > request.max_cos:
503
  raise HTTPException(400, f"Maximum {request.max_cos} COs allowed per batch")
 
504
  results = []
505
  for co_text in request.co_texts:
506
  if not co_text or not co_text.strip():
 
514
  "mappings": mappings,
515
  "bloom_prediction": bloom
516
  })
 
517
  return BatchMappingResponse(total_cos=len(results), method="hybrid", results=results)