Spaces:

aniketkumar1106
/

orbittv2

Runtime error

App Files Files Community

aniketkumar1106 commited on Dec 29, 2025

Commit

03e77d7

verified ·

1 Parent(s): 68e27bd

Update server.py

Browse files

Files changed (1) hide show

server.py +62 -18

server.py CHANGED Viewed

@@ -16,7 +16,7 @@ from huggingface_hub import snapshot_download
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
-DATASET_REPO = "aniketkumar1106/orbit-data"
 IMAGE_DIR = "Productimages"
 DB_TARGET_FOLDER = "orbiitt_db" # The folder ChromaDB expects
 MIN_CONFIDENCE_THRESHOLD = 0.1
@@ -49,7 +49,8 @@ def background_sync():
     global engine, loading_status
     token = os.environ.get("HF_TOKEN")
-    # Cleanup old images
     for f in os.listdir(IMAGE_DIR):
         p = os.path.join(IMAGE_DIR, f)
         try:
@@ -59,10 +60,44 @@ def background_sync():
     try:
         loading_status = "Syncing Assets..."
         snapshot_download(repo_id=DATASET_REPO, repo_type="dataset", token=token, local_dir=".")
         if os.path.exists("orbiitt_db.zip"):
             loading_status = "Extracting Database..."
             with zipfile.ZipFile("orbiitt_db.zip", 'r') as z:
                 z.extractall("temp_extract")
@@ -76,20 +111,28 @@ def background_sync():
                     # Move the directory containing the sqlite3 file to our target location
                     shutil.move(root, DB_TARGET_FOLDER)
                     db_found = True
-                # Move images
-                for f in files:
-                    if f.lower().endswith(('.png', '.jpg', '.jpeg', '.webp')):
-                        src = os.path.join(root, f)
-                        clean_name = normalize_filename(f)
-                        shutil.copy(src, os.path.join(IMAGE_DIR, clean_name))
             shutil.rmtree("temp_extract")
-            # If no DB folder was moved (maybe zip structure was flat?), ensure folder exists
             if not db_found and not os.path.exists(DB_TARGET_FOLDER):
                 os.makedirs(DB_TARGET_FOLDER, exist_ok=True)
         # LOGGING FILE COUNT FOR VALIDATION
         final_count = len(os.listdir(IMAGE_DIR))
         logger.info(f"DISK VALIDATION: {final_count} images ready in {IMAGE_DIR}")
@@ -120,7 +163,9 @@ app.mount("/Productimages", StaticFiles(directory=IMAGE_DIR), name="Productimage
 # Serve UI (Root Endpoint)
 @app.get("/")
 async def read_index():
-    return FileResponse('index.html')
 @app.get("/health")
 def health():
@@ -144,13 +189,12 @@ async def search(text: str = Form(None), weight: float = Form(0.5), file: Upload
             async with await anyio.open_file(t_path, "wb") as f:
                 await f.write(content)
-        # CORRECTED: Calling engine with top_k
         results = await anyio.to_thread.run_sync(
             lambda: engine.search(
                 text_query=text,
                 image_file=t_path,
-                text_weight=actual_weight,
-                #top_k=50
             )
         )
@@ -161,7 +205,7 @@ async def search(text: str = Form(None), weight: float = Form(0.5), file: Upload
         for r in results:
             score = r.get('score', 0)
             pid = r.get('id', 'Product')
             if score < MIN_CONFIDENCE_THRESHOLD or pid in seen_ids:
                 continue
@@ -201,5 +245,5 @@ async def search(text: str = Form(None), weight: float = Form(0.5), file: Upload
 if __name__ == "__main__":
     import uvicorn
-    uvicorn.run(app, host="0.0.0.0", port=7860)

 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+DATASET_REPO = "aniketkumar1106/orbit-data"
 IMAGE_DIR = "Productimages"
 DB_TARGET_FOLDER = "orbiitt_db" # The folder ChromaDB expects
 MIN_CONFIDENCE_THRESHOLD = 0.1
     global engine, loading_status
     token = os.environ.get("HF_TOKEN")
+    # Cleanup old images to prevent duplicates or stale data
+    logger.info("Cleaning up old image directory...")
     for f in os.listdir(IMAGE_DIR):
         p = os.path.join(IMAGE_DIR, f)
         try:
     try:
         loading_status = "Syncing Assets..."
+        logger.info(f"Downloading dataset from {DATASET_REPO}...")
+        # Download everything (including the new zip) to current directory
         snapshot_download(repo_id=DATASET_REPO, repo_type="dataset", token=token, local_dir=".")
+        # --- STEP A: HANDLE IMAGE ZIP (Productimages.zip) ---
+        if os.path.exists("Productimages.zip"):
+            loading_status = "Extracting Images..."
+            logger.info("Found Productimages.zip! Extracting...")
+            # Extract to a temp folder first
+            with zipfile.ZipFile("Productimages.zip", 'r') as z:
+                z.extractall("temp_images_zip")
+            # Move images from temp zip extract to the main IMAGE_DIR
+            count_zip_images = 0
+            for root, dirs, files in os.walk("temp_images_zip"):
+                for f in files:
+                    # Ignore hidden files (like __MACOSX)
+                    if f.startswith('.'): continue
+                    if f.lower().endswith(('.png', '.jpg', '.jpeg', '.webp')):
+                        src = os.path.join(root, f)
+                        clean_name = normalize_filename(f)
+                        dst = os.path.join(IMAGE_DIR, clean_name)
+                        try:
+                            # Move and overwrite if necessary
+                            shutil.move(src, dst)
+                            count_zip_images += 1
+                        except: pass
+            shutil.rmtree("temp_images_zip")
+            logger.info(f"Extracted {count_zip_images} images from Productimages.zip")
+        # --- STEP B: HANDLE DATABASE ZIP (orbiitt_db.zip) ---
         if os.path.exists("orbiitt_db.zip"):
             loading_status = "Extracting Database..."
+            logger.info("Extracting orbiitt_db.zip...")
             with zipfile.ZipFile("orbiitt_db.zip", 'r') as z:
                 z.extractall("temp_extract")
                     # Move the directory containing the sqlite3 file to our target location
                     shutil.move(root, DB_TARGET_FOLDER)
                     db_found = True
             shutil.rmtree("temp_extract")
             if not db_found and not os.path.exists(DB_TARGET_FOLDER):
                 os.makedirs(DB_TARGET_FOLDER, exist_ok=True)
+        # --- STEP C: CATCH ANY LEFTOVER LOOSE IMAGES ---
+        # If any images were downloaded loose (not in zip), move them too
+        for root, dirs, files in os.walk("."):
+            if IMAGE_DIR in root or ".git" in root or DB_TARGET_FOLDER in root:
+                continue
+            for f in files:
+                if f.startswith('.'): continue
+                if f.lower().endswith(('.png', '.jpg', '.jpeg', '.webp')):
+                    src = os.path.join(root, f)
+                    clean_name = normalize_filename(f)
+                    dst = os.path.join(IMAGE_DIR, clean_name)
+                    if not os.path.exists(dst):
+                        try: shutil.move(src, dst)
+                        except: pass
         # LOGGING FILE COUNT FOR VALIDATION
         final_count = len(os.listdir(IMAGE_DIR))
         logger.info(f"DISK VALIDATION: {final_count} images ready in {IMAGE_DIR}")
 # Serve UI (Root Endpoint)
 @app.get("/")
 async def read_index():
+    if os.path.exists('index.html'):
+        return FileResponse('index.html')
+    return {"status": "Online", "message": "index.html not found, but server is running."}
 @app.get("/health")
 def health():
             async with await anyio.open_file(t_path, "wb") as f:
                 await f.write(content)
+        # CORRECTED: Calling engine WITHOUT top_k
         results = await anyio.to_thread.run_sync(
             lambda: engine.search(
                 text_query=text,
                 image_file=t_path,
+                text_weight=actual_weight
             )
         )
         for r in results:
             score = r.get('score', 0)
             pid = r.get('id', 'Product')
             if score < MIN_CONFIDENCE_THRESHOLD or pid in seen_ids:
                 continue
 if __name__ == "__main__":
     import uvicorn
+    # FIXED: Listen on all interfaces (0.0.0.0) and correct port 7860
+    uvicorn.run(app, host="0.0.0.0", port=7860)