mlbench123 commited on
Commit
a417a57
Β·
verified Β·
1 Parent(s): 1a225c5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -13
app.py CHANGED
@@ -9,6 +9,7 @@ from pydantic import BaseModel, Field
9
  from typing import List, Optional, Dict, Any
10
  import json
11
  import numpy as np
 
12
  from sentence_transformers import SentenceTransformer
13
  from sklearn.metrics.pairwise import cosine_similarity
14
  import re
@@ -29,7 +30,32 @@ app.add_middleware(
29
  )
30
 
31
  # Load embedding model (cached globally)
32
- embedding_model = SentenceTransformer('./')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
  # ============= DATA MODELS =============
35
 
@@ -100,19 +126,23 @@ class DatabaseLoader:
100
 
101
  def load_data(self, stages_file: str, tasks_file: str, materials_file: str, rooms_file: str):
102
  """Load JSON data files"""
103
- with open(stages_file, 'r') as f:
 
104
  self.stages = [json.loads(line) for line in f if line.strip()]
105
 
106
- with open(tasks_file, 'r') as f:
 
107
  self.tasks = [json.loads(line) for line in f if line.strip()]
108
 
109
- with open(materials_file, 'r') as f:
 
110
  self.materials = [json.loads(line) for line in f if line.strip()]
111
 
112
- with open(rooms_file, 'r') as f:
 
113
  self.rooms = [json.loads(line) for line in f if line.strip()]
114
 
115
- print(f"Loaded: {len(self.stages)} stages, {len(self.tasks)} tasks, "
116
  f"{len(self.materials)} materials, {len(self.rooms)} rooms")
117
 
118
  def initialize_embeddings(self):
@@ -129,7 +159,7 @@ class DatabaseLoader:
129
  material_texts = [m['material'] for m in self.materials]
130
  self.material_embeddings = embedding_model.encode(material_texts, show_progress_bar=True)
131
 
132
- print("Embeddings ready!")
133
 
134
  # Global DB instance
135
  db = DatabaseLoader()
@@ -370,7 +400,8 @@ async def root():
370
  "service": "Construction Scope Validator",
371
  "version": "1.0.0",
372
  "status": "running",
373
- "data_loaded": len(db.stages) > 0
 
374
  }
375
 
376
  @app.get("/health")
@@ -381,7 +412,8 @@ async def health():
381
  "tasks_loaded": len(db.tasks),
382
  "materials_loaded": len(db.materials),
383
  "rooms_loaded": len(db.rooms),
384
- "embeddings_ready": db.stage_embeddings is not None
 
385
  }
386
 
387
  @app.post("/validate", response_model=ValidatedResponse)
@@ -435,8 +467,16 @@ async def match_room(room_name: str):
435
  async def startup_event():
436
  """Load data and initialize embeddings on startup"""
437
  try:
438
- # In production, load from mounted volumes or environment
439
- # For Hugging Face Spaces, put JSON files in the repo root
 
 
 
 
 
 
 
 
440
  db.load_data(
441
  stages_file='stages.json',
442
  tasks_file='tasks.json',
@@ -444,10 +484,15 @@ async def startup_event():
444
  rooms_file='rooms.json'
445
  )
446
  db.initialize_embeddings()
447
- print("βœ… Service ready!")
 
 
 
448
  except Exception as e:
449
- print(f"❌ Startup error: {e}")
450
  print("Make sure JSON files are in the correct location")
 
 
451
 
452
  if __name__ == "__main__":
453
  import uvicorn
 
9
  from typing import List, Optional, Dict, Any
10
  import json
11
  import numpy as np
12
+ import os
13
  from sentence_transformers import SentenceTransformer
14
  from sklearn.metrics.pairwise import cosine_similarity
15
  import re
 
30
  )
31
 
32
  # Load embedding model (cached globally)
33
+ # Try to load trained model from root, fallback to base model
34
+ print("="*60)
35
+ print("LOADING MODEL...")
36
+ print("="*60)
37
+
38
+ try:
39
+ # Check if trained model files exist in root
40
+ model_files = ['config.json', 'pytorch_model.bin', 'sentence_bert_config.json']
41
+ has_model = all(os.path.exists(f) for f in model_files)
42
+
43
+ if has_model:
44
+ print("βœ“ Trained model files found in root directory")
45
+ print("Loading trained model...")
46
+ embedding_model = SentenceTransformer('./', device='cpu')
47
+ print("βœ… Trained model loaded successfully!")
48
+ else:
49
+ print("⚠️ Trained model not found, using base model...")
50
+ embedding_model = SentenceTransformer('all-MiniLM-L6-v2', device='cpu')
51
+ print("βœ… Base model loaded successfully!")
52
+ except Exception as e:
53
+ print(f"❌ Error loading trained model: {e}")
54
+ print("Falling back to base model...")
55
+ embedding_model = SentenceTransformer('all-MiniLM-L6-v2', device='cpu')
56
+ print("βœ… Base model loaded successfully!")
57
+
58
+ print("="*60)
59
 
60
  # ============= DATA MODELS =============
61
 
 
126
 
127
  def load_data(self, stages_file: str, tasks_file: str, materials_file: str, rooms_file: str):
128
  """Load JSON data files"""
129
+ print(f"Loading {stages_file}...")
130
+ with open(stages_file, 'r', encoding='utf-8') as f:
131
  self.stages = [json.loads(line) for line in f if line.strip()]
132
 
133
+ print(f"Loading {tasks_file}...")
134
+ with open(tasks_file, 'r', encoding='utf-8') as f:
135
  self.tasks = [json.loads(line) for line in f if line.strip()]
136
 
137
+ print(f"Loading {materials_file}...")
138
+ with open(materials_file, 'r', encoding='utf-8') as f:
139
  self.materials = [json.loads(line) for line in f if line.strip()]
140
 
141
+ print(f"Loading {rooms_file}...")
142
+ with open(rooms_file, 'r', encoding='utf-8') as f:
143
  self.rooms = [json.loads(line) for line in f if line.strip()]
144
 
145
+ print(f"βœ… Loaded: {len(self.stages)} stages, {len(self.tasks)} tasks, "
146
  f"{len(self.materials)} materials, {len(self.rooms)} rooms")
147
 
148
  def initialize_embeddings(self):
 
159
  material_texts = [m['material'] for m in self.materials]
160
  self.material_embeddings = embedding_model.encode(material_texts, show_progress_bar=True)
161
 
162
+ print("βœ… Embeddings ready!")
163
 
164
  # Global DB instance
165
  db = DatabaseLoader()
 
400
  "service": "Construction Scope Validator",
401
  "version": "1.0.0",
402
  "status": "running",
403
+ "data_loaded": len(db.stages) > 0,
404
+ "model_type": "trained" if os.path.exists('pytorch_model.bin') else "base"
405
  }
406
 
407
  @app.get("/health")
 
412
  "tasks_loaded": len(db.tasks),
413
  "materials_loaded": len(db.materials),
414
  "rooms_loaded": len(db.rooms),
415
+ "embeddings_ready": db.stage_embeddings is not None,
416
+ "model_type": "trained" if os.path.exists('pytorch_model.bin') else "base"
417
  }
418
 
419
  @app.post("/validate", response_model=ValidatedResponse)
 
467
  async def startup_event():
468
  """Load data and initialize embeddings on startup"""
469
  try:
470
+ print("\n" + "="*60)
471
+ print("STARTING UP...")
472
+ print("="*60)
473
+
474
+ # Check what files are available
475
+ print("\nFiles in root directory:")
476
+ for file in os.listdir('.'):
477
+ print(f" - {file}")
478
+
479
+ # Load data
480
  db.load_data(
481
  stages_file='stages.json',
482
  tasks_file='tasks.json',
 
484
  rooms_file='rooms.json'
485
  )
486
  db.initialize_embeddings()
487
+
488
+ print("\n" + "="*60)
489
+ print("βœ… SERVICE READY!")
490
+ print("="*60)
491
  except Exception as e:
492
+ print(f"\n❌ STARTUP ERROR: {e}")
493
  print("Make sure JSON files are in the correct location")
494
+ import traceback
495
+ traceback.print_exc()
496
 
497
  if __name__ == "__main__":
498
  import uvicorn