Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -9,6 +9,7 @@ from pydantic import BaseModel, Field
|
|
| 9 |
from typing import List, Optional, Dict, Any
|
| 10 |
import json
|
| 11 |
import numpy as np
|
|
|
|
| 12 |
from sentence_transformers import SentenceTransformer
|
| 13 |
from sklearn.metrics.pairwise import cosine_similarity
|
| 14 |
import re
|
|
@@ -29,7 +30,32 @@ app.add_middleware(
|
|
| 29 |
)
|
| 30 |
|
| 31 |
# Load embedding model (cached globally)
|
| 32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
|
| 34 |
# ============= DATA MODELS =============
|
| 35 |
|
|
@@ -100,19 +126,23 @@ class DatabaseLoader:
|
|
| 100 |
|
| 101 |
def load_data(self, stages_file: str, tasks_file: str, materials_file: str, rooms_file: str):
|
| 102 |
"""Load JSON data files"""
|
| 103 |
-
|
|
|
|
| 104 |
self.stages = [json.loads(line) for line in f if line.strip()]
|
| 105 |
|
| 106 |
-
|
|
|
|
| 107 |
self.tasks = [json.loads(line) for line in f if line.strip()]
|
| 108 |
|
| 109 |
-
|
|
|
|
| 110 |
self.materials = [json.loads(line) for line in f if line.strip()]
|
| 111 |
|
| 112 |
-
|
|
|
|
| 113 |
self.rooms = [json.loads(line) for line in f if line.strip()]
|
| 114 |
|
| 115 |
-
print(f"Loaded: {len(self.stages)} stages, {len(self.tasks)} tasks, "
|
| 116 |
f"{len(self.materials)} materials, {len(self.rooms)} rooms")
|
| 117 |
|
| 118 |
def initialize_embeddings(self):
|
|
@@ -129,7 +159,7 @@ class DatabaseLoader:
|
|
| 129 |
material_texts = [m['material'] for m in self.materials]
|
| 130 |
self.material_embeddings = embedding_model.encode(material_texts, show_progress_bar=True)
|
| 131 |
|
| 132 |
-
print("Embeddings ready!")
|
| 133 |
|
| 134 |
# Global DB instance
|
| 135 |
db = DatabaseLoader()
|
|
@@ -370,7 +400,8 @@ async def root():
|
|
| 370 |
"service": "Construction Scope Validator",
|
| 371 |
"version": "1.0.0",
|
| 372 |
"status": "running",
|
| 373 |
-
"data_loaded": len(db.stages) > 0
|
|
|
|
| 374 |
}
|
| 375 |
|
| 376 |
@app.get("/health")
|
|
@@ -381,7 +412,8 @@ async def health():
|
|
| 381 |
"tasks_loaded": len(db.tasks),
|
| 382 |
"materials_loaded": len(db.materials),
|
| 383 |
"rooms_loaded": len(db.rooms),
|
| 384 |
-
"embeddings_ready": db.stage_embeddings is not None
|
|
|
|
| 385 |
}
|
| 386 |
|
| 387 |
@app.post("/validate", response_model=ValidatedResponse)
|
|
@@ -435,8 +467,16 @@ async def match_room(room_name: str):
|
|
| 435 |
async def startup_event():
|
| 436 |
"""Load data and initialize embeddings on startup"""
|
| 437 |
try:
|
| 438 |
-
|
| 439 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 440 |
db.load_data(
|
| 441 |
stages_file='stages.json',
|
| 442 |
tasks_file='tasks.json',
|
|
@@ -444,10 +484,15 @@ async def startup_event():
|
|
| 444 |
rooms_file='rooms.json'
|
| 445 |
)
|
| 446 |
db.initialize_embeddings()
|
| 447 |
-
|
|
|
|
|
|
|
|
|
|
| 448 |
except Exception as e:
|
| 449 |
-
print(f"β
|
| 450 |
print("Make sure JSON files are in the correct location")
|
|
|
|
|
|
|
| 451 |
|
| 452 |
if __name__ == "__main__":
|
| 453 |
import uvicorn
|
|
|
|
| 9 |
from typing import List, Optional, Dict, Any
|
| 10 |
import json
|
| 11 |
import numpy as np
|
| 12 |
+
import os
|
| 13 |
from sentence_transformers import SentenceTransformer
|
| 14 |
from sklearn.metrics.pairwise import cosine_similarity
|
| 15 |
import re
|
|
|
|
| 30 |
)
|
| 31 |
|
| 32 |
# Load embedding model (cached globally)
|
| 33 |
+
# Try to load trained model from root, fallback to base model
|
| 34 |
+
print("="*60)
|
| 35 |
+
print("LOADING MODEL...")
|
| 36 |
+
print("="*60)
|
| 37 |
+
|
| 38 |
+
try:
|
| 39 |
+
# Check if trained model files exist in root
|
| 40 |
+
model_files = ['config.json', 'pytorch_model.bin', 'sentence_bert_config.json']
|
| 41 |
+
has_model = all(os.path.exists(f) for f in model_files)
|
| 42 |
+
|
| 43 |
+
if has_model:
|
| 44 |
+
print("β Trained model files found in root directory")
|
| 45 |
+
print("Loading trained model...")
|
| 46 |
+
embedding_model = SentenceTransformer('./', device='cpu')
|
| 47 |
+
print("β
Trained model loaded successfully!")
|
| 48 |
+
else:
|
| 49 |
+
print("β οΈ Trained model not found, using base model...")
|
| 50 |
+
embedding_model = SentenceTransformer('all-MiniLM-L6-v2', device='cpu')
|
| 51 |
+
print("β
Base model loaded successfully!")
|
| 52 |
+
except Exception as e:
|
| 53 |
+
print(f"β Error loading trained model: {e}")
|
| 54 |
+
print("Falling back to base model...")
|
| 55 |
+
embedding_model = SentenceTransformer('all-MiniLM-L6-v2', device='cpu')
|
| 56 |
+
print("β
Base model loaded successfully!")
|
| 57 |
+
|
| 58 |
+
print("="*60)
|
| 59 |
|
| 60 |
# ============= DATA MODELS =============
|
| 61 |
|
|
|
|
| 126 |
|
| 127 |
def load_data(self, stages_file: str, tasks_file: str, materials_file: str, rooms_file: str):
|
| 128 |
"""Load JSON data files"""
|
| 129 |
+
print(f"Loading {stages_file}...")
|
| 130 |
+
with open(stages_file, 'r', encoding='utf-8') as f:
|
| 131 |
self.stages = [json.loads(line) for line in f if line.strip()]
|
| 132 |
|
| 133 |
+
print(f"Loading {tasks_file}...")
|
| 134 |
+
with open(tasks_file, 'r', encoding='utf-8') as f:
|
| 135 |
self.tasks = [json.loads(line) for line in f if line.strip()]
|
| 136 |
|
| 137 |
+
print(f"Loading {materials_file}...")
|
| 138 |
+
with open(materials_file, 'r', encoding='utf-8') as f:
|
| 139 |
self.materials = [json.loads(line) for line in f if line.strip()]
|
| 140 |
|
| 141 |
+
print(f"Loading {rooms_file}...")
|
| 142 |
+
with open(rooms_file, 'r', encoding='utf-8') as f:
|
| 143 |
self.rooms = [json.loads(line) for line in f if line.strip()]
|
| 144 |
|
| 145 |
+
print(f"β
Loaded: {len(self.stages)} stages, {len(self.tasks)} tasks, "
|
| 146 |
f"{len(self.materials)} materials, {len(self.rooms)} rooms")
|
| 147 |
|
| 148 |
def initialize_embeddings(self):
|
|
|
|
| 159 |
material_texts = [m['material'] for m in self.materials]
|
| 160 |
self.material_embeddings = embedding_model.encode(material_texts, show_progress_bar=True)
|
| 161 |
|
| 162 |
+
print("β
Embeddings ready!")
|
| 163 |
|
| 164 |
# Global DB instance
|
| 165 |
db = DatabaseLoader()
|
|
|
|
| 400 |
"service": "Construction Scope Validator",
|
| 401 |
"version": "1.0.0",
|
| 402 |
"status": "running",
|
| 403 |
+
"data_loaded": len(db.stages) > 0,
|
| 404 |
+
"model_type": "trained" if os.path.exists('pytorch_model.bin') else "base"
|
| 405 |
}
|
| 406 |
|
| 407 |
@app.get("/health")
|
|
|
|
| 412 |
"tasks_loaded": len(db.tasks),
|
| 413 |
"materials_loaded": len(db.materials),
|
| 414 |
"rooms_loaded": len(db.rooms),
|
| 415 |
+
"embeddings_ready": db.stage_embeddings is not None,
|
| 416 |
+
"model_type": "trained" if os.path.exists('pytorch_model.bin') else "base"
|
| 417 |
}
|
| 418 |
|
| 419 |
@app.post("/validate", response_model=ValidatedResponse)
|
|
|
|
| 467 |
async def startup_event():
|
| 468 |
"""Load data and initialize embeddings on startup"""
|
| 469 |
try:
|
| 470 |
+
print("\n" + "="*60)
|
| 471 |
+
print("STARTING UP...")
|
| 472 |
+
print("="*60)
|
| 473 |
+
|
| 474 |
+
# Check what files are available
|
| 475 |
+
print("\nFiles in root directory:")
|
| 476 |
+
for file in os.listdir('.'):
|
| 477 |
+
print(f" - {file}")
|
| 478 |
+
|
| 479 |
+
# Load data
|
| 480 |
db.load_data(
|
| 481 |
stages_file='stages.json',
|
| 482 |
tasks_file='tasks.json',
|
|
|
|
| 484 |
rooms_file='rooms.json'
|
| 485 |
)
|
| 486 |
db.initialize_embeddings()
|
| 487 |
+
|
| 488 |
+
print("\n" + "="*60)
|
| 489 |
+
print("β
SERVICE READY!")
|
| 490 |
+
print("="*60)
|
| 491 |
except Exception as e:
|
| 492 |
+
print(f"\nβ STARTUP ERROR: {e}")
|
| 493 |
print("Make sure JSON files are in the correct location")
|
| 494 |
+
import traceback
|
| 495 |
+
traceback.print_exc()
|
| 496 |
|
| 497 |
if __name__ == "__main__":
|
| 498 |
import uvicorn
|