mlbench123 commited on
Commit
170c4b9
·
verified ·
1 Parent(s): ee0043d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +520 -21
app.py CHANGED
@@ -1,6 +1,7 @@
 
1
  """
2
  FastAPI Service for Construction Scope Validation
3
- Deploy on Hugging Face Spaces
4
  """
5
  from fastapi import FastAPI, HTTPException
6
  from fastapi.middleware.cors import CORSMiddleware
@@ -9,6 +10,7 @@ from typing import List, Optional, Dict, Any
9
  import json
10
  import numpy as np
11
  import os
 
12
  from sentence_transformers import SentenceTransformer
13
  from sklearn.metrics.pairwise import cosine_similarity
14
  import re
@@ -28,22 +30,57 @@ app.add_middleware(
28
  allow_headers=["*"],
29
  )
30
 
31
- # Load embedding model (cached globally)
32
  print("="*60)
33
  print("LOADING MODEL...")
34
  print("="*60)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  try:
36
  model_files = ['config.json', 'sentence_bert_config.json']
37
  has_weights = os.path.exists('pytorch_model.bin') or os.path.exists('model.safetensors')
38
  has_model = all(os.path.exists(f) for f in model_files) and has_weights
39
 
40
  if has_model:
41
- print("✓ Trained model files found in root directory")
42
  print("Loading trained model...")
43
  embedding_model = SentenceTransformer('./', device='cpu')
44
  print("✅ Trained model loaded successfully!")
45
  else:
46
- print("⚠️ Trained model not found, using base model...")
47
  embedding_model = SentenceTransformer('all-MiniLM-L6-v2', device='cpu')
48
  print("✅ Base model loaded successfully!")
49
  except Exception as e:
@@ -109,18 +146,13 @@ class ValidatedResponse(BaseModel):
109
 
110
  # ============= HELPER FUNCTION =============
111
  def parse_room_area(room_area_value):
112
- """
113
- Parse roomArea field which might be a string, list, or None
114
- Returns a proper list of strings
115
- """
116
  if room_area_value is None:
117
  return []
118
 
119
- # If it's already a list, return it
120
  if isinstance(room_area_value, list):
121
  return room_area_value
122
 
123
- # If it's a string, try to parse it as JSON
124
  if isinstance(room_area_value, str):
125
  try:
126
  parsed = json.loads(room_area_value)
@@ -128,10 +160,8 @@ def parse_room_area(room_area_value):
128
  return parsed
129
  return [str(parsed)]
130
  except json.JSONDecodeError:
131
- # If JSON parsing fails, treat it as a single item
132
  return [room_area_value]
133
 
134
- # Fallback: convert to string and wrap in list
135
  return [str(room_area_value)]
136
 
137
  # ============= DATABASE LOADERS =============
@@ -201,12 +231,10 @@ def find_best_room(llm_area: str, threshold: float = 0.6) -> tuple:
201
  """Find closest matching room from DB"""
202
  llm_area_lower = llm_area.lower()
203
 
204
- # Exact match first
205
  for room in db.rooms:
206
  if room['name'].lower() == llm_area_lower:
207
  return room, 1.0
208
 
209
- # Fuzzy match
210
  room_texts = [r['name'] for r in db.rooms]
211
  query_embedding = embedding_model.encode([llm_area])
212
  room_embeddings = embedding_model.encode(room_texts)
@@ -326,14 +354,13 @@ def validate_scope(llm_scope: LLMScopeRequest) -> ValidatedResponse:
326
  for m, score in material_matches
327
  ]
328
 
329
- # FIX: Parse roomArea properly
330
  validated_task = ValidatedTask(
331
  taskId=best_task['taskId'],
332
  task=best_task['task'],
333
  displayName=best_task['displayName'],
334
  unit=best_task['unit'],
335
  stageId=best_task['stageId'],
336
- roomArea=parse_room_area(best_task['roomArea']), # <-- FIXED HERE
337
  confidence_score=round(task_confidence, 2),
338
  recommended_materials=validated_materials
339
  )
@@ -407,10 +434,7 @@ async def health():
407
 
408
  @app.post("/validate", response_model=ValidatedResponse)
409
  async def validate_scope_endpoint(request: LLMScopeRequest):
410
- """
411
- Validate LLM-generated scope against database
412
- Returns enriched data with matched stages, tasks, materials, and confidence scores
413
- """
414
  try:
415
  if not db.stages:
416
  raise HTTPException(status_code=500, detail="Database not loaded")
@@ -467,13 +491,488 @@ async def startup_event():
467
  print("="*60)
468
  except Exception as e:
469
  print(f"\n❌ STARTUP ERROR: {e}")
470
- print("Make sure JSON files are in the correct location")
471
  import traceback
472
  traceback.print_exc()
473
 
474
  if __name__ == "__main__":
475
  import uvicorn
476
  uvicorn.run(app, host="0.0.0.0", port=7860)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
477
 
478
  # """
479
  # FastAPI Service for Construction Scope Validation
 
1
+
2
  """
3
  FastAPI Service for Construction Scope Validation
4
+ Deploy on Hugging Face Spaces - Flattened File Structure
5
  """
6
  from fastapi import FastAPI, HTTPException
7
  from fastapi.middleware.cors import CORSMiddleware
 
10
  import json
11
  import numpy as np
12
  import os
13
+ import shutil
14
  from sentence_transformers import SentenceTransformer
15
  from sklearn.metrics.pairwise import cosine_similarity
16
  import re
 
30
  allow_headers=["*"],
31
  )
32
 
33
+ # ============= MODEL LOADING WITH FLAT STRUCTURE =============
34
  print("="*60)
35
  print("LOADING MODEL...")
36
  print("="*60)
37
+
38
+ def setup_model_structure():
39
+ """
40
+ Create temporary folder structure for sentence-transformers
41
+ if files are in root (flattened structure)
42
+ """
43
+ # Check if we need to create structure
44
+ if not os.path.exists('1_Pooling') or not os.path.exists('2_Normalize'):
45
+ print("Creating temporary model structure...")
46
+
47
+ # Create directories
48
+ os.makedirs('1_Pooling', exist_ok=True)
49
+ os.makedirs('2_Normalize', exist_ok=True)
50
+
51
+ # Pooling config
52
+ pooling_config = {
53
+ "word_embedding_dimension": 384,
54
+ "pooling_mode_cls_token": False,
55
+ "pooling_mode_mean_tokens": True,
56
+ "pooling_mode_max_tokens": False,
57
+ "pooling_mode_mean_sqrt_len_tokens": False
58
+ }
59
+ with open('1_Pooling/config.json', 'w') as f:
60
+ json.dump(pooling_config, f, indent=2)
61
+
62
+ # Normalize config (empty is fine)
63
+ with open('2_Normalize/config.json', 'w') as f:
64
+ json.dump({}, f)
65
+
66
+ print("✓ Created 1_Pooling/config.json")
67
+ print("✓ Created 2_Normalize/config.json")
68
+
69
+ # Setup structure before loading model
70
+ setup_model_structure()
71
+
72
  try:
73
  model_files = ['config.json', 'sentence_bert_config.json']
74
  has_weights = os.path.exists('pytorch_model.bin') or os.path.exists('model.safetensors')
75
  has_model = all(os.path.exists(f) for f in model_files) and has_weights
76
 
77
  if has_model:
78
+ print("✓ Model files found in root directory")
79
  print("Loading trained model...")
80
  embedding_model = SentenceTransformer('./', device='cpu')
81
  print("✅ Trained model loaded successfully!")
82
  else:
83
+ print("⚠️ Model not found, using base model...")
84
  embedding_model = SentenceTransformer('all-MiniLM-L6-v2', device='cpu')
85
  print("✅ Base model loaded successfully!")
86
  except Exception as e:
 
146
 
147
  # ============= HELPER FUNCTION =============
148
  def parse_room_area(room_area_value):
149
+ """Parse roomArea field which might be a string, list, or None"""
 
 
 
150
  if room_area_value is None:
151
  return []
152
 
 
153
  if isinstance(room_area_value, list):
154
  return room_area_value
155
 
 
156
  if isinstance(room_area_value, str):
157
  try:
158
  parsed = json.loads(room_area_value)
 
160
  return parsed
161
  return [str(parsed)]
162
  except json.JSONDecodeError:
 
163
  return [room_area_value]
164
 
 
165
  return [str(room_area_value)]
166
 
167
  # ============= DATABASE LOADERS =============
 
231
  """Find closest matching room from DB"""
232
  llm_area_lower = llm_area.lower()
233
 
 
234
  for room in db.rooms:
235
  if room['name'].lower() == llm_area_lower:
236
  return room, 1.0
237
 
 
238
  room_texts = [r['name'] for r in db.rooms]
239
  query_embedding = embedding_model.encode([llm_area])
240
  room_embeddings = embedding_model.encode(room_texts)
 
354
  for m, score in material_matches
355
  ]
356
 
 
357
  validated_task = ValidatedTask(
358
  taskId=best_task['taskId'],
359
  task=best_task['task'],
360
  displayName=best_task['displayName'],
361
  unit=best_task['unit'],
362
  stageId=best_task['stageId'],
363
+ roomArea=parse_room_area(best_task['roomArea']),
364
  confidence_score=round(task_confidence, 2),
365
  recommended_materials=validated_materials
366
  )
 
434
 
435
  @app.post("/validate", response_model=ValidatedResponse)
436
  async def validate_scope_endpoint(request: LLMScopeRequest):
437
+ """Validate LLM-generated scope against database"""
 
 
 
438
  try:
439
  if not db.stages:
440
  raise HTTPException(status_code=500, detail="Database not loaded")
 
491
  print("="*60)
492
  except Exception as e:
493
  print(f"\n❌ STARTUP ERROR: {e}")
 
494
  import traceback
495
  traceback.print_exc()
496
 
497
  if __name__ == "__main__":
498
  import uvicorn
499
  uvicorn.run(app, host="0.0.0.0", port=7860)
500
+ # """
501
+ # FastAPI Service for Construction Scope Validation
502
+ # Deploy on Hugging Face Spaces
503
+ # """
504
+ # from fastapi import FastAPI, HTTPException
505
+ # from fastapi.middleware.cors import CORSMiddleware
506
+ # from pydantic import BaseModel, Field
507
+ # from typing import List, Optional, Dict, Any
508
+ # import json
509
+ # import numpy as np
510
+ # import os
511
+ # from sentence_transformers import SentenceTransformer
512
+ # from sklearn.metrics.pairwise import cosine_similarity
513
+ # import re
514
+
515
+ # app = FastAPI(
516
+ # title="Construction Scope Validator API",
517
+ # description="Validates and enriches LLM-generated construction scope with DB data",
518
+ # version="1.0.0"
519
+ # )
520
+
521
+ # # CORS middleware
522
+ # app.add_middleware(
523
+ # CORSMiddleware,
524
+ # allow_origins=["*"],
525
+ # allow_credentials=True,
526
+ # allow_methods=["*"],
527
+ # allow_headers=["*"],
528
+ # )
529
+
530
+ # # Load embedding model (cached globally)
531
+ # print("="*60)
532
+ # print("LOADING MODEL...")
533
+ # print("="*60)
534
+ # try:
535
+ # model_files = ['config.json', 'sentence_bert_config.json']
536
+ # has_weights = os.path.exists('pytorch_model.bin') or os.path.exists('model.safetensors')
537
+ # has_model = all(os.path.exists(f) for f in model_files) and has_weights
538
+
539
+ # if has_model:
540
+ # print("✓ Trained model files found in root directory")
541
+ # print("Loading trained model...")
542
+ # embedding_model = SentenceTransformer('./', device='cpu')
543
+ # print("✅ Trained model loaded successfully!")
544
+ # else:
545
+ # print("⚠️ Trained model not found, using base model...")
546
+ # embedding_model = SentenceTransformer('all-MiniLM-L6-v2', device='cpu')
547
+ # print("✅ Base model loaded successfully!")
548
+ # except Exception as e:
549
+ # print(f"❌ Error loading trained model: {e}")
550
+ # print("Falling back to base model...")
551
+ # embedding_model = SentenceTransformer('all-MiniLM-L6-v2', device='cpu')
552
+ # print("✅ Base model loaded successfully!")
553
+ # print("="*60)
554
+
555
+ # # ============= DATA MODELS =============
556
+ # class LLMScopeItem(BaseModel):
557
+ # stage: str
558
+ # task: str
559
+ # material: str
560
+ # quantity: float
561
+ # unit: str
562
+
563
+ # class LLMAreaScope(BaseModel):
564
+ # area: str
565
+ # items: List[LLMScopeItem]
566
+
567
+ # class LLMScopeRequest(BaseModel):
568
+ # scope_of_work: List[LLMAreaScope]
569
+
570
+ # class ValidatedMaterial(BaseModel):
571
+ # materialId: int
572
+ # name: str
573
+ # material: str
574
+ # unit: str
575
+ # price: float
576
+ # margin: float
577
+ # categories: List[str]
578
+ # confidence_score: float
579
+
580
+ # class ValidatedTask(BaseModel):
581
+ # taskId: int
582
+ # task: str
583
+ # displayName: str
584
+ # unit: str
585
+ # stageId: int
586
+ # roomArea: List[str]
587
+ # confidence_score: float
588
+ # recommended_materials: List[ValidatedMaterial]
589
+
590
+ # class ValidatedStage(BaseModel):
591
+ # stageId: int
592
+ # stage: str
593
+ # priority: int
594
+ # confidence_score: float
595
+ # tasks: List[ValidatedTask]
596
+
597
+ # class ValidatedArea(BaseModel):
598
+ # roomId: Optional[int]
599
+ # name: str
600
+ # roomType: str
601
+ # matched: bool
602
+ # confidence_score: float
603
+ # stages: List[ValidatedStage]
604
+
605
+ # class ValidatedResponse(BaseModel):
606
+ # areas: List[ValidatedArea]
607
+ # summary: Dict[str, Any]
608
+
609
+ # # ============= HELPER FUNCTION =============
610
+ # def parse_room_area(room_area_value):
611
+ # """
612
+ # Parse roomArea field which might be a string, list, or None
613
+ # Returns a proper list of strings
614
+ # """
615
+ # if room_area_value is None:
616
+ # return []
617
+
618
+ # # If it's already a list, return it
619
+ # if isinstance(room_area_value, list):
620
+ # return room_area_value
621
+
622
+ # # If it's a string, try to parse it as JSON
623
+ # if isinstance(room_area_value, str):
624
+ # try:
625
+ # parsed = json.loads(room_area_value)
626
+ # if isinstance(parsed, list):
627
+ # return parsed
628
+ # return [str(parsed)]
629
+ # except json.JSONDecodeError:
630
+ # # If JSON parsing fails, treat it as a single item
631
+ # return [room_area_value]
632
+
633
+ # # Fallback: convert to string and wrap in list
634
+ # return [str(room_area_value)]
635
+
636
+ # # ============= DATABASE LOADERS =============
637
+ # class DatabaseLoader:
638
+ # def __init__(self):
639
+ # self.stages = []
640
+ # self.tasks = []
641
+ # self.materials = []
642
+ # self.rooms = []
643
+ # self.stage_embeddings = None
644
+ # self.task_embeddings = None
645
+ # self.material_embeddings = None
646
+
647
+ # def load_data(self, stages_file: str, tasks_file: str, materials_file: str, rooms_file: str):
648
+ # """Load JSON data files"""
649
+ # print(f"Loading {stages_file}...")
650
+ # with open(stages_file, 'r', encoding='utf-8') as f:
651
+ # self.stages = [json.loads(line) for line in f if line.strip()]
652
+
653
+ # print(f"Loading {tasks_file}...")
654
+ # with open(tasks_file, 'r', encoding='utf-8') as f:
655
+ # self.tasks = [json.loads(line) for line in f if line.strip()]
656
+
657
+ # print(f"Loading {materials_file}...")
658
+ # with open(materials_file, 'r', encoding='utf-8') as f:
659
+ # self.materials = [json.loads(line) for line in f if line.strip()]
660
+
661
+ # print(f"Loading {rooms_file}...")
662
+ # with open(rooms_file, 'r', encoding='utf-8') as f:
663
+ # self.rooms = [json.loads(line) for line in f if line.strip()]
664
+
665
+ # print(f"✅ Loaded: {len(self.stages)} stages, {len(self.tasks)} tasks, "
666
+ # f"{len(self.materials)} materials, {len(self.rooms)} rooms")
667
+
668
+ # def initialize_embeddings(self):
669
+ # """Pre-compute embeddings for fast lookup"""
670
+ # print("Computing stage embeddings...")
671
+ # stage_texts = [s['stage'] for s in self.stages]
672
+ # self.stage_embeddings = embedding_model.encode(stage_texts, show_progress_bar=True)
673
+
674
+ # print("Computing task embeddings...")
675
+ # task_texts = [t['task'] for t in self.tasks]
676
+ # self.task_embeddings = embedding_model.encode(task_texts, show_progress_bar=True)
677
+
678
+ # print("Computing material embeddings...")
679
+ # material_texts = [m['material'] for m in self.materials]
680
+ # self.material_embeddings = embedding_model.encode(material_texts, show_progress_bar=True)
681
+
682
+ # print("✅ Embeddings ready!")
683
+
684
+ # # Global DB instance
685
+ # db = DatabaseLoader()
686
+
687
+ # # ============= MATCHING FUNCTIONS =============
688
+ # def find_best_stage(llm_stage: str, threshold: float = 0.5) -> tuple:
689
+ # """Find closest matching stage from DB"""
690
+ # query_embedding = embedding_model.encode([llm_stage])
691
+ # similarities = cosine_similarity(query_embedding, db.stage_embeddings)[0]
692
+ # best_idx = np.argmax(similarities)
693
+ # best_score = similarities[best_idx]
694
+
695
+ # if best_score >= threshold:
696
+ # return db.stages[best_idx], best_score
697
+ # return None, 0.0
698
+
699
+ # def find_best_room(llm_area: str, threshold: float = 0.6) -> tuple:
700
+ # """Find closest matching room from DB"""
701
+ # llm_area_lower = llm_area.lower()
702
+
703
+ # # Exact match first
704
+ # for room in db.rooms:
705
+ # if room['name'].lower() == llm_area_lower:
706
+ # return room, 1.0
707
+
708
+ # # Fuzzy match
709
+ # room_texts = [r['name'] for r in db.rooms]
710
+ # query_embedding = embedding_model.encode([llm_area])
711
+ # room_embeddings = embedding_model.encode(room_texts)
712
+ # similarities = cosine_similarity(query_embedding, room_embeddings)[0]
713
+
714
+ # best_idx = np.argmax(similarities)
715
+ # best_score = similarities[best_idx]
716
+
717
+ # if best_score >= threshold:
718
+ # return db.rooms[best_idx], best_score
719
+ # return None, 0.0
720
+
721
+ # def find_tasks_for_stage(stage_id: int, llm_task: str, top_k: int = 5) -> List[tuple]:
722
+ # """Find relevant tasks for a stage matching LLM task description"""
723
+ # stage_tasks = [t for t in db.tasks if t['stageId'] == stage_id]
724
+ # if not stage_tasks:
725
+ # return []
726
+
727
+ # task_indices = [db.tasks.index(t) for t in stage_tasks]
728
+ # query_embedding = embedding_model.encode([llm_task])
729
+ # stage_task_embeddings = db.task_embeddings[task_indices]
730
+ # similarities = cosine_similarity(query_embedding, stage_task_embeddings)[0]
731
+
732
+ # top_indices = np.argsort(similarities)[-top_k:][::-1]
733
+ # results = [(stage_tasks[idx], similarities[idx]) for idx in top_indices]
734
+ # return results
735
+
736
+ # def extract_keywords(text: str) -> List[str]:
737
+ # """Extract meaningful keywords from text"""
738
+ # stop_words = {'and', 'or', 'the', 'to', 'a', 'of', 'for', 'in', 'on', 'supply', 'install'}
739
+ # words = re.findall(r'\b\w+\b', text.lower())
740
+ # return [w for w in words if w not in stop_words and len(w) > 2]
741
+
742
+ # def find_materials_for_task(task: dict, llm_material: str, unit: str, top_k: int = 10) -> List[tuple]:
743
+ # """Find materials matching task requirements"""
744
+ # task_keywords = extract_keywords(task['task'])
745
+ # llm_keywords = extract_keywords(llm_material)
746
+ # all_keywords = set(task_keywords + llm_keywords)
747
+
748
+ # compatible_materials = [
749
+ # m for m in db.materials
750
+ # if m['unit'] == unit or m['unit'] == 'unit' or m['unit'] is None
751
+ # ]
752
+ # if not compatible_materials:
753
+ # compatible_materials = db.materials
754
+
755
+ # scored_materials = []
756
+ # for material in compatible_materials:
757
+ # score = 0.0
758
+ # material_text = material['material'].lower()
759
+
760
+ # for keyword in all_keywords:
761
+ # if keyword in material_text:
762
+ # score += 2.0
763
+
764
+ # categories_str = ' '.join(material.get('categories', [])).lower()
765
+ # for keyword in all_keywords:
766
+ # if keyword in categories_str:
767
+ # score += 1.0
768
+
769
+ # material_idx = db.materials.index(material)
770
+ # query_embedding = embedding_model.encode([llm_material])
771
+ # material_embedding = db.material_embeddings[material_idx].reshape(1, -1)
772
+ # semantic_score = cosine_similarity(query_embedding, material_embedding)[0][0]
773
+ # score += semantic_score * 5.0
774
+
775
+ # if score > 0:
776
+ # scored_materials.append((material, score))
777
+
778
+ # scored_materials.sort(key=lambda x: x[1], reverse=True)
779
+ # return scored_materials[:top_k]
780
+
781
+ # # ============= VALIDATION PIPELINE =============
782
+ # def validate_scope(llm_scope: LLMScopeRequest) -> ValidatedResponse:
783
+ # """Main validation pipeline"""
784
+ # validated_areas = []
785
+
786
+ # for area_scope in llm_scope.scope_of_work:
787
+ # matched_room, room_confidence = find_best_room(area_scope.area)
788
+ # validated_stages_dict = {}
789
+
790
+ # for item in area_scope.items:
791
+ # matched_stage, stage_confidence = find_best_stage(item.stage)
792
+ # if not matched_stage:
793
+ # continue
794
+
795
+ # stage_id = matched_stage['stageId']
796
+
797
+ # if stage_id not in validated_stages_dict:
798
+ # validated_stages_dict[stage_id] = {
799
+ # 'stage_data': matched_stage,
800
+ # 'confidence': stage_confidence,
801
+ # 'tasks': []
802
+ # }
803
+
804
+ # task_matches = find_tasks_for_stage(stage_id, item.task, top_k=3)
805
+ # if not task_matches:
806
+ # continue
807
+
808
+ # best_task, task_confidence = task_matches[0]
809
+
810
+ # material_matches = find_materials_for_task(
811
+ # best_task, item.material, item.unit, top_k=5
812
+ # )
813
+
814
+ # validated_materials = [
815
+ # ValidatedMaterial(
816
+ # materialId=m['materialId'],
817
+ # name=m['name'],
818
+ # material=m['material'],
819
+ # unit=m['unit'] or 'unit',
820
+ # price=float(m['price']),
821
+ # margin=float(m['margin']),
822
+ # categories=m['categories'],
823
+ # confidence_score=round(score / 10.0, 2)
824
+ # )
825
+ # for m, score in material_matches
826
+ # ]
827
+
828
+ # # FIX: Parse roomArea properly
829
+ # validated_task = ValidatedTask(
830
+ # taskId=best_task['taskId'],
831
+ # task=best_task['task'],
832
+ # displayName=best_task['displayName'],
833
+ # unit=best_task['unit'],
834
+ # stageId=best_task['stageId'],
835
+ # roomArea=parse_room_area(best_task['roomArea']), # <-- FIXED HERE
836
+ # confidence_score=round(task_confidence, 2),
837
+ # recommended_materials=validated_materials
838
+ # )
839
+
840
+ # validated_stages_dict[stage_id]['tasks'].append(validated_task)
841
+
842
+ # validated_stages = [
843
+ # ValidatedStage(
844
+ # stageId=stage_data['stage_data']['stageId'],
845
+ # stage=stage_data['stage_data']['stage'],
846
+ # priority=stage_data['stage_data']['priority'],
847
+ # confidence_score=round(stage_data['confidence'], 2),
848
+ # tasks=stage_data['tasks']
849
+ # )
850
+ # for stage_data in validated_stages_dict.values()
851
+ # ]
852
+
853
+ # validated_stages.sort(key=lambda x: x.priority)
854
+
855
+ # validated_area = ValidatedArea(
856
+ # roomId=matched_room['id'] if matched_room else None,
857
+ # name=matched_room['name'] if matched_room else area_scope.area,
858
+ # roomType=matched_room['roomType'] if matched_room else 'unknown',
859
+ # matched=matched_room is not None,
860
+ # confidence_score=round(room_confidence, 2),
861
+ # stages=validated_stages
862
+ # )
863
+
864
+ # validated_areas.append(validated_area)
865
+
866
+ # summary = {
867
+ # 'total_areas': len(validated_areas),
868
+ # 'total_stages': sum(len(a.stages) for a in validated_areas),
869
+ # 'total_tasks': sum(len(s.tasks) for a in validated_areas for s in a.stages),
870
+ # 'total_materials': sum(
871
+ # len(t.recommended_materials)
872
+ # for a in validated_areas
873
+ # for s in a.stages
874
+ # for t in s.tasks
875
+ # ),
876
+ # 'matched_areas': sum(1 for a in validated_areas if a.matched),
877
+ # 'avg_confidence': round(
878
+ # np.mean([a.confidence_score for a in validated_areas]), 2
879
+ # ) if validated_areas else 0.0
880
+ # }
881
+
882
+ # return ValidatedResponse(areas=validated_areas, summary=summary)
883
+
884
+ # # ============= API ENDPOINTS =============
885
+ # @app.get("/")
886
+ # async def root():
887
+ # return {
888
+ # "service": "Construction Scope Validator",
889
+ # "version": "1.0.0",
890
+ # "status": "running",
891
+ # "data_loaded": len(db.stages) > 0,
892
+ # "model_type": "trained" if os.path.exists('model.safetensors') else "base"
893
+ # }
894
+
895
+ # @app.get("/health")
896
+ # async def health():
897
+ # return {
898
+ # "status": "healthy",
899
+ # "stages_loaded": len(db.stages),
900
+ # "tasks_loaded": len(db.tasks),
901
+ # "materials_loaded": len(db.materials),
902
+ # "rooms_loaded": len(db.rooms),
903
+ # "embeddings_ready": db.stage_embeddings is not None,
904
+ # "model_type": "trained" if os.path.exists('model.safetensors') else "base"
905
+ # }
906
+
907
+ # @app.post("/validate", response_model=ValidatedResponse)
908
+ # async def validate_scope_endpoint(request: LLMScopeRequest):
909
+ # """
910
+ # Validate LLM-generated scope against database
911
+ # Returns enriched data with matched stages, tasks, materials, and confidence scores
912
+ # """
913
+ # try:
914
+ # if not db.stages:
915
+ # raise HTTPException(status_code=500, detail="Database not loaded")
916
+ # result = validate_scope(request)
917
+ # return result
918
+ # except Exception as e:
919
+ # import traceback
920
+ # error_detail = f"Validation error: {str(e)}\n{traceback.format_exc()}"
921
+ # raise HTTPException(status_code=500, detail=error_detail)
922
+
923
+ # @app.post("/match-stage")
924
+ # async def match_stage(stage_name: str):
925
+ # """Test endpoint: match a single stage name"""
926
+ # matched_stage, confidence = find_best_stage(stage_name)
927
+ # if matched_stage:
928
+ # return {
929
+ # "input": stage_name,
930
+ # "matched": matched_stage,
931
+ # "confidence": round(confidence, 2)
932
+ # }
933
+ # return {"input": stage_name, "matched": None, "confidence": 0.0}
934
+
935
+ # @app.post("/match-room")
936
+ # async def match_room(room_name: str):
937
+ # """Test endpoint: match a single room name"""
938
+ # matched_room, confidence = find_best_room(room_name)
939
+ # if matched_room:
940
+ # return {
941
+ # "input": room_name,
942
+ # "matched": matched_room,
943
+ # "confidence": round(confidence, 2)
944
+ # }
945
+ # return {"input": room_name, "matched": None, "confidence": 0.0}
946
+
947
+ # # ============= STARTUP =============
948
+ # @app.on_event("startup")
949
+ # async def startup_event():
950
+ # """Load data and initialize embeddings on startup"""
951
+ # try:
952
+ # print("\n" + "="*60)
953
+ # print("STARTING UP...")
954
+ # print("="*60)
955
+
956
+ # db.load_data(
957
+ # stages_file='stages.json',
958
+ # tasks_file='tasks.json',
959
+ # materials_file='materials.json',
960
+ # rooms_file='rooms.json'
961
+ # )
962
+ # db.initialize_embeddings()
963
+
964
+ # print("\n" + "="*60)
965
+ # print("✅ SERVICE READY!")
966
+ # print("="*60)
967
+ # except Exception as e:
968
+ # print(f"\n❌ STARTUP ERROR: {e}")
969
+ # print("Make sure JSON files are in the correct location")
970
+ # import traceback
971
+ # traceback.print_exc()
972
+
973
+ # if __name__ == "__main__":
974
+ # import uvicorn
975
+ # uvicorn.run(app, host="0.0.0.0", port=7860)
976
 
977
  # """
978
  # FastAPI Service for Construction Scope Validation