Spaces:

mlbench123
/

Sudoco_ScopeOfWork

Sleeping

App Files Files Community

mlbench123 commited on Jan 2

Commit

a417a57

verified ·

1 Parent(s): 1a225c5

Update app.py

Browse files

Files changed (1) hide show

app.py +58 -13

app.py CHANGED Viewed

@@ -9,6 +9,7 @@ from pydantic import BaseModel, Field
 from typing import List, Optional, Dict, Any
 import json
 import numpy as np
 from sentence_transformers import SentenceTransformer
 from sklearn.metrics.pairwise import cosine_similarity
 import re
@@ -29,7 +30,32 @@ app.add_middleware(
 )
 # Load embedding model (cached globally)
-embedding_model = SentenceTransformer('./')
 # ============= DATA MODELS =============
@@ -100,19 +126,23 @@ class DatabaseLoader:
     def load_data(self, stages_file: str, tasks_file: str, materials_file: str, rooms_file: str):
         """Load JSON data files"""
-        with open(stages_file, 'r') as f:
             self.stages = [json.loads(line) for line in f if line.strip()]
-        with open(tasks_file, 'r') as f:
             self.tasks = [json.loads(line) for line in f if line.strip()]
-        with open(materials_file, 'r') as f:
             self.materials = [json.loads(line) for line in f if line.strip()]
-        with open(rooms_file, 'r') as f:
             self.rooms = [json.loads(line) for line in f if line.strip()]
-        print(f"Loaded: {len(self.stages)} stages, {len(self.tasks)} tasks, "
               f"{len(self.materials)} materials, {len(self.rooms)} rooms")
     def initialize_embeddings(self):
@@ -129,7 +159,7 @@ class DatabaseLoader:
         material_texts = [m['material'] for m in self.materials]
         self.material_embeddings = embedding_model.encode(material_texts, show_progress_bar=True)
-        print("Embeddings ready!")
 # Global DB instance
 db = DatabaseLoader()
@@ -370,7 +400,8 @@ async def root():
         "service": "Construction Scope Validator",
         "version": "1.0.0",
         "status": "running",
-        "data_loaded": len(db.stages) > 0
     }
 @app.get("/health")
@@ -381,7 +412,8 @@ async def health():
         "tasks_loaded": len(db.tasks),
         "materials_loaded": len(db.materials),
         "rooms_loaded": len(db.rooms),
-        "embeddings_ready": db.stage_embeddings is not None
     }
 @app.post("/validate", response_model=ValidatedResponse)
@@ -435,8 +467,16 @@ async def match_room(room_name: str):
 async def startup_event():
     """Load data and initialize embeddings on startup"""
     try:
-        # In production, load from mounted volumes or environment
-        # For Hugging Face Spaces, put JSON files in the repo root
         db.load_data(
             stages_file='stages.json',
             tasks_file='tasks.json',
@@ -444,10 +484,15 @@ async def startup_event():
             rooms_file='rooms.json'
         )
         db.initialize_embeddings()
-        print("✅ Service ready!")
     except Exception as e:
-        print(f"❌ Startup error: {e}")
         print("Make sure JSON files are in the correct location")
 if __name__ == "__main__":
     import uvicorn

 from typing import List, Optional, Dict, Any
 import json
 import numpy as np
+import os
 from sentence_transformers import SentenceTransformer
 from sklearn.metrics.pairwise import cosine_similarity
 import re
 )
 # Load embedding model (cached globally)
+# Try to load trained model from root, fallback to base model
+print("="*60)
+print("LOADING MODEL...")
+print("="*60)
+try:
+    # Check if trained model files exist in root
+    model_files = ['config.json', 'pytorch_model.bin', 'sentence_bert_config.json']
+    has_model = all(os.path.exists(f) for f in model_files)
+    if has_model:
+        print("✓ Trained model files found in root directory")
+        print("Loading trained model...")
+        embedding_model = SentenceTransformer('./', device='cpu')
+        print("✅ Trained model loaded successfully!")
+    else:
+        print("⚠️ Trained model not found, using base model...")
+        embedding_model = SentenceTransformer('all-MiniLM-L6-v2', device='cpu')
+        print("✅ Base model loaded successfully!")
+except Exception as e:
+    print(f"❌ Error loading trained model: {e}")
+    print("Falling back to base model...")
+    embedding_model = SentenceTransformer('all-MiniLM-L6-v2', device='cpu')
+    print("✅ Base model loaded successfully!")
+print("="*60)
 # ============= DATA MODELS =============
     def load_data(self, stages_file: str, tasks_file: str, materials_file: str, rooms_file: str):
         """Load JSON data files"""
+        print(f"Loading {stages_file}...")
+        with open(stages_file, 'r', encoding='utf-8') as f:
             self.stages = [json.loads(line) for line in f if line.strip()]
+        print(f"Loading {tasks_file}...")
+        with open(tasks_file, 'r', encoding='utf-8') as f:
             self.tasks = [json.loads(line) for line in f if line.strip()]
+        print(f"Loading {materials_file}...")
+        with open(materials_file, 'r', encoding='utf-8') as f:
             self.materials = [json.loads(line) for line in f if line.strip()]
+        print(f"Loading {rooms_file}...")
+        with open(rooms_file, 'r', encoding='utf-8') as f:
             self.rooms = [json.loads(line) for line in f if line.strip()]
+        print(f"✅ Loaded: {len(self.stages)} stages, {len(self.tasks)} tasks, "
               f"{len(self.materials)} materials, {len(self.rooms)} rooms")
     def initialize_embeddings(self):
         material_texts = [m['material'] for m in self.materials]
         self.material_embeddings = embedding_model.encode(material_texts, show_progress_bar=True)
+        print("✅ Embeddings ready!")
 # Global DB instance
 db = DatabaseLoader()
         "service": "Construction Scope Validator",
         "version": "1.0.0",
         "status": "running",
+        "data_loaded": len(db.stages) > 0,
+        "model_type": "trained" if os.path.exists('pytorch_model.bin') else "base"
     }
 @app.get("/health")
         "tasks_loaded": len(db.tasks),
         "materials_loaded": len(db.materials),
         "rooms_loaded": len(db.rooms),
+        "embeddings_ready": db.stage_embeddings is not None,
+        "model_type": "trained" if os.path.exists('pytorch_model.bin') else "base"
     }
 @app.post("/validate", response_model=ValidatedResponse)
 async def startup_event():
     """Load data and initialize embeddings on startup"""
     try:
+        print("\n" + "="*60)
+        print("STARTING UP...")
+        print("="*60)
+        # Check what files are available
+        print("\nFiles in root directory:")
+        for file in os.listdir('.'):
+            print(f"  - {file}")
+        # Load data
         db.load_data(
             stages_file='stages.json',
             tasks_file='tasks.json',
             rooms_file='rooms.json'
         )
         db.initialize_embeddings()
+        print("\n" + "="*60)
+        print("✅ SERVICE READY!")
+        print("="*60)
     except Exception as e:
+        print(f"\n❌ STARTUP ERROR: {e}")
         print("Make sure JSON files are in the correct location")
+        import traceback
+        traceback.print_exc()
 if __name__ == "__main__":
     import uvicorn