aniketkumar1106 commited on
Commit
03e77d7
·
verified ·
1 Parent(s): 68e27bd

Update server.py

Browse files
Files changed (1) hide show
  1. server.py +62 -18
server.py CHANGED
@@ -16,7 +16,7 @@ from huggingface_hub import snapshot_download
16
  logging.basicConfig(level=logging.INFO)
17
  logger = logging.getLogger(__name__)
18
 
19
- DATASET_REPO = "aniketkumar1106/orbit-data"
20
  IMAGE_DIR = "Productimages"
21
  DB_TARGET_FOLDER = "orbiitt_db" # The folder ChromaDB expects
22
  MIN_CONFIDENCE_THRESHOLD = 0.1
@@ -49,7 +49,8 @@ def background_sync():
49
  global engine, loading_status
50
  token = os.environ.get("HF_TOKEN")
51
 
52
- # Cleanup old images
 
53
  for f in os.listdir(IMAGE_DIR):
54
  p = os.path.join(IMAGE_DIR, f)
55
  try:
@@ -59,10 +60,44 @@ def background_sync():
59
 
60
  try:
61
  loading_status = "Syncing Assets..."
 
 
 
62
  snapshot_download(repo_id=DATASET_REPO, repo_type="dataset", token=token, local_dir=".")
63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  if os.path.exists("orbiitt_db.zip"):
65
  loading_status = "Extracting Database..."
 
66
  with zipfile.ZipFile("orbiitt_db.zip", 'r') as z:
67
  z.extractall("temp_extract")
68
 
@@ -76,20 +111,28 @@ def background_sync():
76
  # Move the directory containing the sqlite3 file to our target location
77
  shutil.move(root, DB_TARGET_FOLDER)
78
  db_found = True
79
-
80
- # Move images
81
- for f in files:
82
- if f.lower().endswith(('.png', '.jpg', '.jpeg', '.webp')):
83
- src = os.path.join(root, f)
84
- clean_name = normalize_filename(f)
85
- shutil.copy(src, os.path.join(IMAGE_DIR, clean_name))
86
-
87
  shutil.rmtree("temp_extract")
88
 
89
- # If no DB folder was moved (maybe zip structure was flat?), ensure folder exists
90
  if not db_found and not os.path.exists(DB_TARGET_FOLDER):
91
  os.makedirs(DB_TARGET_FOLDER, exist_ok=True)
92
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  # LOGGING FILE COUNT FOR VALIDATION
94
  final_count = len(os.listdir(IMAGE_DIR))
95
  logger.info(f"DISK VALIDATION: {final_count} images ready in {IMAGE_DIR}")
@@ -120,7 +163,9 @@ app.mount("/Productimages", StaticFiles(directory=IMAGE_DIR), name="Productimage
120
  # Serve UI (Root Endpoint)
121
  @app.get("/")
122
  async def read_index():
123
- return FileResponse('index.html')
 
 
124
 
125
  @app.get("/health")
126
  def health():
@@ -144,13 +189,12 @@ async def search(text: str = Form(None), weight: float = Form(0.5), file: Upload
144
  async with await anyio.open_file(t_path, "wb") as f:
145
  await f.write(content)
146
 
147
- # CORRECTED: Calling engine with top_k
148
  results = await anyio.to_thread.run_sync(
149
  lambda: engine.search(
150
  text_query=text,
151
  image_file=t_path,
152
- text_weight=actual_weight,
153
- #top_k=50
154
  )
155
  )
156
 
@@ -161,7 +205,7 @@ async def search(text: str = Form(None), weight: float = Form(0.5), file: Upload
161
  for r in results:
162
  score = r.get('score', 0)
163
  pid = r.get('id', 'Product')
164
-
165
  if score < MIN_CONFIDENCE_THRESHOLD or pid in seen_ids:
166
  continue
167
 
@@ -201,5 +245,5 @@ async def search(text: str = Form(None), weight: float = Form(0.5), file: Upload
201
 
202
  if __name__ == "__main__":
203
  import uvicorn
204
- uvicorn.run(app, host="0.0.0.0", port=7860)
205
-
 
16
  logging.basicConfig(level=logging.INFO)
17
  logger = logging.getLogger(__name__)
18
 
19
+ DATASET_REPO = "aniketkumar1106/orbit-data"
20
  IMAGE_DIR = "Productimages"
21
  DB_TARGET_FOLDER = "orbiitt_db" # The folder ChromaDB expects
22
  MIN_CONFIDENCE_THRESHOLD = 0.1
 
49
  global engine, loading_status
50
  token = os.environ.get("HF_TOKEN")
51
 
52
+ # Cleanup old images to prevent duplicates or stale data
53
+ logger.info("Cleaning up old image directory...")
54
  for f in os.listdir(IMAGE_DIR):
55
  p = os.path.join(IMAGE_DIR, f)
56
  try:
 
60
 
61
  try:
62
  loading_status = "Syncing Assets..."
63
+ logger.info(f"Downloading dataset from {DATASET_REPO}...")
64
+
65
+ # Download everything (including the new zip) to current directory
66
  snapshot_download(repo_id=DATASET_REPO, repo_type="dataset", token=token, local_dir=".")
67
 
68
+ # --- STEP A: HANDLE IMAGE ZIP (Productimages.zip) ---
69
+ if os.path.exists("Productimages.zip"):
70
+ loading_status = "Extracting Images..."
71
+ logger.info("Found Productimages.zip! Extracting...")
72
+
73
+ # Extract to a temp folder first
74
+ with zipfile.ZipFile("Productimages.zip", 'r') as z:
75
+ z.extractall("temp_images_zip")
76
+
77
+ # Move images from temp zip extract to the main IMAGE_DIR
78
+ count_zip_images = 0
79
+ for root, dirs, files in os.walk("temp_images_zip"):
80
+ for f in files:
81
+ # Ignore hidden files (like __MACOSX)
82
+ if f.startswith('.'): continue
83
+
84
+ if f.lower().endswith(('.png', '.jpg', '.jpeg', '.webp')):
85
+ src = os.path.join(root, f)
86
+ clean_name = normalize_filename(f)
87
+ dst = os.path.join(IMAGE_DIR, clean_name)
88
+ try:
89
+ # Move and overwrite if necessary
90
+ shutil.move(src, dst)
91
+ count_zip_images += 1
92
+ except: pass
93
+
94
+ shutil.rmtree("temp_images_zip")
95
+ logger.info(f"Extracted {count_zip_images} images from Productimages.zip")
96
+
97
+ # --- STEP B: HANDLE DATABASE ZIP (orbiitt_db.zip) ---
98
  if os.path.exists("orbiitt_db.zip"):
99
  loading_status = "Extracting Database..."
100
+ logger.info("Extracting orbiitt_db.zip...")
101
  with zipfile.ZipFile("orbiitt_db.zip", 'r') as z:
102
  z.extractall("temp_extract")
103
 
 
111
  # Move the directory containing the sqlite3 file to our target location
112
  shutil.move(root, DB_TARGET_FOLDER)
113
  db_found = True
114
+
 
 
 
 
 
 
 
115
  shutil.rmtree("temp_extract")
116
 
 
117
  if not db_found and not os.path.exists(DB_TARGET_FOLDER):
118
  os.makedirs(DB_TARGET_FOLDER, exist_ok=True)
119
 
120
+ # --- STEP C: CATCH ANY LEFTOVER LOOSE IMAGES ---
121
+ # If any images were downloaded loose (not in zip), move them too
122
+ for root, dirs, files in os.walk("."):
123
+ if IMAGE_DIR in root or ".git" in root or DB_TARGET_FOLDER in root:
124
+ continue
125
+ for f in files:
126
+ if f.startswith('.'): continue
127
+
128
+ if f.lower().endswith(('.png', '.jpg', '.jpeg', '.webp')):
129
+ src = os.path.join(root, f)
130
+ clean_name = normalize_filename(f)
131
+ dst = os.path.join(IMAGE_DIR, clean_name)
132
+ if not os.path.exists(dst):
133
+ try: shutil.move(src, dst)
134
+ except: pass
135
+
136
  # LOGGING FILE COUNT FOR VALIDATION
137
  final_count = len(os.listdir(IMAGE_DIR))
138
  logger.info(f"DISK VALIDATION: {final_count} images ready in {IMAGE_DIR}")
 
163
  # Serve UI (Root Endpoint)
164
  @app.get("/")
165
  async def read_index():
166
+ if os.path.exists('index.html'):
167
+ return FileResponse('index.html')
168
+ return {"status": "Online", "message": "index.html not found, but server is running."}
169
 
170
  @app.get("/health")
171
  def health():
 
189
  async with await anyio.open_file(t_path, "wb") as f:
190
  await f.write(content)
191
 
192
+ # CORRECTED: Calling engine WITHOUT top_k
193
  results = await anyio.to_thread.run_sync(
194
  lambda: engine.search(
195
  text_query=text,
196
  image_file=t_path,
197
+ text_weight=actual_weight
 
198
  )
199
  )
200
 
 
205
  for r in results:
206
  score = r.get('score', 0)
207
  pid = r.get('id', 'Product')
208
+
209
  if score < MIN_CONFIDENCE_THRESHOLD or pid in seen_ids:
210
  continue
211
 
 
245
 
246
  if __name__ == "__main__":
247
  import uvicorn
248
+ # FIXED: Listen on all interfaces (0.0.0.0) and correct port 7860
249
+ uvicorn.run(app, host="0.0.0.0", port=7860)