MakPr016 commited on
Commit
94ff2cc
·
1 Parent(s): f25ce8b

Added post processing

Browse files
Files changed (2) hide show
  1. .gitignore +3 -1
  2. app.py +99 -11
.gitignore CHANGED
@@ -1,3 +1,5 @@
1
  venv**
2
  __pycache__/
3
- *.pyc
 
 
 
1
  venv**
2
  __pycache__/
3
+ *.pyc
4
+ .env
5
+ .DS_Store
app.py CHANGED
@@ -7,8 +7,12 @@ from sklearn.metrics.pairwise import cosine_similarity
7
  import os
8
  import re
9
  from datetime import datetime
 
 
 
 
 
10
 
11
- # Official PO Definitions (your complete version)
12
  OFFICIAL_PO_DEFINITIONS = {
13
  "PO1": "Apply the knowledge of mathematics, science, engineering fundamentals, and an engineering specialization to the solution of complex engineering problems",
14
  "PO2": "Identify, formulate, review research literature, and analyze complex engineering problems reaching substantiated conclusions using first principles of mathematics, natural sciences, and engineering sciences",
@@ -23,7 +27,7 @@ OFFICIAL_PO_DEFINITIONS = {
23
  "PO11": "Demonstrate knowledge and understanding of the engineering and management principles and apply these to one's own work, as a member and leader in a team, to manage projects and in multidisciplinary environments"
24
  }
25
 
26
- # Bloom's Taxonomy Definitions
27
  BLOOM_LEVEL_DEFINITIONS = {
28
  "Remember": "Recall facts, terms, basic concepts, and answers without necessarily understanding",
29
  "Understand": "Demonstrate understanding of facts and ideas by organizing, comparing, translating, interpreting",
@@ -33,7 +37,7 @@ BLOOM_LEVEL_DEFINITIONS = {
33
  "Create": "Compile information together in a different way by combining elements in new patterns or proposing alternative solutions"
34
  }
35
 
36
- # PO Keywords (your complete version)
37
  PO_KEYWORDS = {
38
  "PO1": [
39
  "knowledge", "mathematics", "math", "science", "computing", "engineering",
@@ -212,12 +216,34 @@ PO_KEYWORDS = {
212
  ]
213
  }
214
 
 
215
  class FineTunedCOPOMapper:
216
  def __init__(self):
217
- hf_token = os.environ.get("HF_TOKEN")
218
- print("Loading model from Hugging Face...")
219
- self.model = SentenceTransformer("MakPr016/co-po-bloom-model", token=hf_token)
220
- print("Model loaded successfully! (88.1% accuracy)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
221
  self.po_embeddings = {}
222
  self.bloom_embeddings = {}
223
  self._precompute_embeddings()
@@ -257,8 +283,54 @@ class FineTunedCOPOMapper:
257
  else:
258
  return min(1.0, matched_count / len(keywords) * 3.0)
259
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
260
  def predict_bloom_level(self, co_text):
261
- """Predict Bloom's taxonomy level"""
262
  co_embedding = self.model.encode([co_text])[0]
263
  bloom_scores = {}
264
  for level, bloom_embedding in self.bloom_embeddings.items():
@@ -296,7 +368,8 @@ class FineTunedCOPOMapper:
296
  'confidence': confidence,
297
  'method': 'semantic_only'
298
  })
299
- return sorted(results, key=lambda x: x['score'], reverse=True)
 
300
 
301
  def map_co_to_pos_hybrid(self, co_text):
302
  co_embedding = self.model.encode([co_text])[0]
@@ -304,7 +377,6 @@ class FineTunedCOPOMapper:
304
  for po_id, po_embedding in self.po_embeddings.items():
305
  semantic_score = float(cosine_similarity([co_embedding], [po_embedding])[0][0])
306
  keyword_score = self._calculate_keyword_score(co_text, po_id)
307
- # 80:20 ratio (semantic:keywords)
308
  final_score = (0.80 * semantic_score) + (0.20 * keyword_score)
309
  if final_score > 0.7:
310
  strength, confidence = 3, "high"
@@ -324,27 +396,34 @@ class FineTunedCOPOMapper:
324
  'confidence': confidence,
325
  'method': 'hybrid'
326
  })
327
- return sorted(results, key=lambda x: x['score'], reverse=True)
 
 
328
 
329
  app = FastAPI(title="CO-PO Mapping API", version="3.0.0 (with Bloom's)")
330
  app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"])
331
 
 
332
  mapper = None
333
 
 
334
  @app.on_event("startup")
335
  async def startup():
336
  global mapper
337
  mapper = FineTunedCOPOMapper()
338
 
 
339
  class CORequest(BaseModel):
340
  co_text: str
341
  include_bloom: bool = True
342
 
 
343
  class BatchCORequest(BaseModel):
344
  co_texts: List[str]
345
  include_bloom: bool = True
346
  max_cos: int = 50
347
 
 
348
  class POMapping(BaseModel):
349
  po_id: str
350
  score: float
@@ -355,12 +434,14 @@ class POMapping(BaseModel):
355
  confidence: str
356
  method: str
357
 
 
358
  class BloomPrediction(BaseModel):
359
  predicted_level: str
360
  confidence: float
361
  all_scores: Dict[str, float]
362
  description: str
363
 
 
364
  class MappingResponse(BaseModel):
365
  co_text: str
366
  total_pos: int
@@ -368,11 +449,13 @@ class MappingResponse(BaseModel):
368
  mappings: List[POMapping]
369
  bloom_prediction: Optional[BloomPrediction] = None
370
 
 
371
  class BatchMappingResponse(BaseModel):
372
  total_cos: int
373
  method: str
374
  results: List[Dict[str, Any]]
375
 
 
376
  @app.get("/")
377
  async def root():
378
  return {
@@ -382,10 +465,12 @@ async def root():
382
  "features": ["PO Mapping", "Bloom's Taxonomy", "Semantic + Hybrid modes"]
383
  }
384
 
 
385
  @app.get("/health")
386
  async def health():
387
  return {"status": "healthy", "model_loaded": mapper is not None}
388
 
 
389
  @app.post("/map/semantic", response_model=MappingResponse)
390
  async def map_semantic(request: CORequest):
391
  if not request.co_text or not request.co_text.strip():
@@ -400,6 +485,7 @@ async def map_semantic(request: CORequest):
400
  bloom_prediction=BloomPrediction(**bloom) if bloom else None
401
  )
402
 
 
403
  @app.post("/map/hybrid", response_model=MappingResponse)
404
  async def map_hybrid(request: CORequest):
405
  if not request.co_text or not request.co_text.strip():
@@ -414,6 +500,7 @@ async def map_hybrid(request: CORequest):
414
  bloom_prediction=BloomPrediction(**bloom) if bloom else None
415
  )
416
 
 
417
  @app.post("/map/batch/semantic", response_model=BatchMappingResponse)
418
  async def map_batch_semantic(request: BatchCORequest):
419
  if not request.co_texts or len(request.co_texts) == 0:
@@ -437,6 +524,7 @@ async def map_batch_semantic(request: BatchCORequest):
437
 
438
  return BatchMappingResponse(total_cos=len(results), method="semantic_only", results=results)
439
 
 
440
  @app.post("/map/batch/hybrid", response_model=BatchMappingResponse)
441
  async def map_batch_hybrid(request: BatchCORequest):
442
  if not request.co_texts or len(request.co_texts) == 0:
 
7
  import os
8
  import re
9
  from datetime import datetime
10
+ from dotenv import load_dotenv
11
+
12
+ load_dotenv()
13
+
14
+
15
 
 
16
  OFFICIAL_PO_DEFINITIONS = {
17
  "PO1": "Apply the knowledge of mathematics, science, engineering fundamentals, and an engineering specialization to the solution of complex engineering problems",
18
  "PO2": "Identify, formulate, review research literature, and analyze complex engineering problems reaching substantiated conclusions using first principles of mathematics, natural sciences, and engineering sciences",
 
27
  "PO11": "Demonstrate knowledge and understanding of the engineering and management principles and apply these to one's own work, as a member and leader in a team, to manage projects and in multidisciplinary environments"
28
  }
29
 
30
+
31
  BLOOM_LEVEL_DEFINITIONS = {
32
  "Remember": "Recall facts, terms, basic concepts, and answers without necessarily understanding",
33
  "Understand": "Demonstrate understanding of facts and ideas by organizing, comparing, translating, interpreting",
 
37
  "Create": "Compile information together in a different way by combining elements in new patterns or proposing alternative solutions"
38
  }
39
 
40
+
41
  PO_KEYWORDS = {
42
  "PO1": [
43
  "knowledge", "mathematics", "math", "science", "computing", "engineering",
 
216
  ]
217
  }
218
 
219
+
220
  class FineTunedCOPOMapper:
221
  def __init__(self):
222
+ print("Loading model...")
223
+
224
+ try:
225
+ self.model = SentenceTransformer(
226
+ "MakPr016/co-po-bloom-model",
227
+ local_files_only=True,
228
+ trust_remote_code=False
229
+ )
230
+ print("Model loaded from cache (Offline mode)")
231
+ except Exception as e:
232
+ print(f"Offline mode failed: {str(e)}")
233
+ print("Attempting online load...")
234
+ try:
235
+ hf_token = os.environ.get("HF_TOKEN")
236
+ if not hf_token:
237
+ raise ValueError("HF_TOKEN not set")
238
+ self.model = SentenceTransformer(
239
+ "MakPr016/co-po-bloom-model",
240
+ token=hf_token
241
+ )
242
+ print("Model loaded from HuggingFace (Online mode)")
243
+ except Exception as e2:
244
+ print(f"Online mode also failed: {str(e2)}")
245
+ raise
246
+
247
  self.po_embeddings = {}
248
  self.bloom_embeddings = {}
249
  self._precompute_embeddings()
 
283
  else:
284
  return min(1.0, matched_count / len(keywords) * 3.0)
285
 
286
+ def _apply_constraints(self, results, co_text):
287
+ po_scores = {r['po_id']: r['score'] for r in results}
288
+
289
+ po_hierarchy = ['PO1', 'PO2', 'PO3', 'PO4']
290
+ for i in range(len(po_hierarchy) - 1):
291
+ current_po = po_hierarchy[i]
292
+ next_po = po_hierarchy[i + 1]
293
+ if po_scores[current_po] < po_scores[next_po]:
294
+ po_scores[next_po] = po_scores[current_po]
295
+
296
+ po7_keywords = [
297
+ "sustainability", "environmental", "resource efficiency", "renewable",
298
+ "pollution", "waste", "climate", "conservation", "eco", "green",
299
+ "carbon", "lifecycle", "circular economy", "biodiversity"
300
+ ]
301
+ co_lower = co_text.lower()
302
+ po7_keyword_matches = sum(1 for keyword in po7_keywords if keyword in co_lower)
303
+
304
+ if po7_keyword_matches >= 3:
305
+ po_scores['PO7'] = 0.8
306
+ elif po7_keyword_matches == 2:
307
+ po_scores['PO7'] = 0.7
308
+ elif po7_keyword_matches == 1:
309
+ po_scores['PO7'] = 0.6
310
+ else:
311
+ po_scores['PO7'] = 0.4
312
+
313
+ po11_keywords = [
314
+ "project", "management", "plan", "budget", "schedule", "resource",
315
+ "timeline", "milestone", "risk", "team", "coordinate", "execute"
316
+ ]
317
+ po11_keyword_matches = sum(1 for keyword in po11_keywords if keyword in co_lower)
318
+
319
+ if po11_keyword_matches >= 3:
320
+ po_scores['PO11'] = 0.8
321
+ elif po11_keyword_matches == 2:
322
+ po_scores['PO11'] = 0.7
323
+ elif po11_keyword_matches == 1:
324
+ po_scores['PO11'] = 0.6
325
+ else:
326
+ po_scores['PO11'] = 0.4
327
+
328
+ for result in results:
329
+ result['score'] = round(po_scores[result['po_id']], 3)
330
+
331
+ return sorted(results, key=lambda x: x['score'], reverse=True)
332
+
333
  def predict_bloom_level(self, co_text):
 
334
  co_embedding = self.model.encode([co_text])[0]
335
  bloom_scores = {}
336
  for level, bloom_embedding in self.bloom_embeddings.items():
 
368
  'confidence': confidence,
369
  'method': 'semantic_only'
370
  })
371
+ results = self._apply_constraints(results, co_text)
372
+ return results
373
 
374
  def map_co_to_pos_hybrid(self, co_text):
375
  co_embedding = self.model.encode([co_text])[0]
 
377
  for po_id, po_embedding in self.po_embeddings.items():
378
  semantic_score = float(cosine_similarity([co_embedding], [po_embedding])[0][0])
379
  keyword_score = self._calculate_keyword_score(co_text, po_id)
 
380
  final_score = (0.80 * semantic_score) + (0.20 * keyword_score)
381
  if final_score > 0.7:
382
  strength, confidence = 3, "high"
 
396
  'confidence': confidence,
397
  'method': 'hybrid'
398
  })
399
+ results = self._apply_constraints(results, co_text)
400
+ return results
401
+
402
 
403
  app = FastAPI(title="CO-PO Mapping API", version="3.0.0 (with Bloom's)")
404
  app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"])
405
 
406
+
407
  mapper = None
408
 
409
+
410
  @app.on_event("startup")
411
  async def startup():
412
  global mapper
413
  mapper = FineTunedCOPOMapper()
414
 
415
+
416
  class CORequest(BaseModel):
417
  co_text: str
418
  include_bloom: bool = True
419
 
420
+
421
  class BatchCORequest(BaseModel):
422
  co_texts: List[str]
423
  include_bloom: bool = True
424
  max_cos: int = 50
425
 
426
+
427
  class POMapping(BaseModel):
428
  po_id: str
429
  score: float
 
434
  confidence: str
435
  method: str
436
 
437
+
438
  class BloomPrediction(BaseModel):
439
  predicted_level: str
440
  confidence: float
441
  all_scores: Dict[str, float]
442
  description: str
443
 
444
+
445
  class MappingResponse(BaseModel):
446
  co_text: str
447
  total_pos: int
 
449
  mappings: List[POMapping]
450
  bloom_prediction: Optional[BloomPrediction] = None
451
 
452
+
453
  class BatchMappingResponse(BaseModel):
454
  total_cos: int
455
  method: str
456
  results: List[Dict[str, Any]]
457
 
458
+
459
  @app.get("/")
460
  async def root():
461
  return {
 
465
  "features": ["PO Mapping", "Bloom's Taxonomy", "Semantic + Hybrid modes"]
466
  }
467
 
468
+
469
  @app.get("/health")
470
  async def health():
471
  return {"status": "healthy", "model_loaded": mapper is not None}
472
 
473
+
474
  @app.post("/map/semantic", response_model=MappingResponse)
475
  async def map_semantic(request: CORequest):
476
  if not request.co_text or not request.co_text.strip():
 
485
  bloom_prediction=BloomPrediction(**bloom) if bloom else None
486
  )
487
 
488
+
489
  @app.post("/map/hybrid", response_model=MappingResponse)
490
  async def map_hybrid(request: CORequest):
491
  if not request.co_text or not request.co_text.strip():
 
500
  bloom_prediction=BloomPrediction(**bloom) if bloom else None
501
  )
502
 
503
+
504
  @app.post("/map/batch/semantic", response_model=BatchMappingResponse)
505
  async def map_batch_semantic(request: BatchCORequest):
506
  if not request.co_texts or len(request.co_texts) == 0:
 
524
 
525
  return BatchMappingResponse(total_cos=len(results), method="semantic_only", results=results)
526
 
527
+
528
  @app.post("/map/batch/hybrid", response_model=BatchMappingResponse)
529
  async def map_batch_hybrid(request: BatchCORequest):
530
  if not request.co_texts or len(request.co_texts) == 0: