aniketkumar1106 commited on
Commit
7db5fb6
·
verified ·
1 Parent(s): fd44eb0

Update server.py

Browse files
Files changed (1) hide show
  1. server.py +39 -26
server.py CHANGED
@@ -3,6 +3,7 @@ import shutil
3
  import zipfile
4
  import logging
5
  import urllib.parse
 
6
  from fastapi import FastAPI, File, UploadFile, Form, HTTPException
7
  from fastapi.middleware.cors import CORSMiddleware
8
  from fastapi.staticfiles import StaticFiles
@@ -25,14 +26,25 @@ DATASET_REPO = "aniketkumar1106/orbit-data"
25
  IMAGE_DIR = "Productimages"
26
  DB_TARGET = "orbiitt.db"
27
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  def setup_environment():
29
  token = os.environ.get("HF_TOKEN")
30
- # Clean old data to prevent version mismatch
31
  if os.path.exists(IMAGE_DIR): shutil.rmtree(IMAGE_DIR)
32
  os.makedirs(IMAGE_DIR)
33
 
34
  try:
35
- logger.info(f"Downloading dataset from {DATASET_REPO}...")
36
  snapshot_download(repo_id=DATASET_REPO, repo_type="dataset", token=token, local_dir=".")
37
 
38
  if os.path.exists("orbiitt_db.zip"):
@@ -42,26 +54,19 @@ def setup_environment():
42
  for root, _, files in os.walk("temp_extract"):
43
  for f in files:
44
  src = os.path.join(root, f)
45
- # 1. DB LOCATOR: Handles nested or renamed DB files
46
  if f.lower() in ["orbiitt_db", "orbiitt.db"] or (f.endswith(".db") and not f.startswith(".")):
47
  shutil.copy(src, f"./{DB_TARGET}")
48
- # 2. IMAGE NORMALIZER: Fixes encoding and moves to static folder
49
  elif f.lower().endswith(('.png', '.jpg', '.jpeg', '.webp')):
50
- clean_name = urllib.parse.unquote(f)
 
51
  dest = os.path.join(IMAGE_DIR, clean_name)
52
  shutil.copy(src, dest)
53
  shutil.rmtree("temp_extract")
54
-
55
- # Root fallback for database
56
- if os.path.exists("orbiitt_db") and not os.path.exists(DB_TARGET):
57
- shutil.move("orbiitt_db", DB_TARGET)
58
-
59
- logger.info(f"READY: Images={len(os.listdir(IMAGE_DIR))} DB={os.path.exists(DB_TARGET)}")
60
  except Exception as e:
61
- logger.error(f"FATAL SETUP ERROR: {e}")
62
 
63
  setup_environment()
64
-
65
  app.mount("/Productimages", StaticFiles(directory=IMAGE_DIR), name="Productimages")
66
 
67
  engine = None
@@ -69,18 +74,12 @@ try:
69
  from orbiitt_engine import OrbiittEngine
70
  if os.path.exists(DB_TARGET):
71
  engine = OrbiittEngine()
72
- logger.info("SIGLIP ENGINE ONLINE")
73
  except Exception as e:
74
- logger.error(f"ENGINE ERROR: {e}")
75
-
76
- @app.get("/")
77
- def health():
78
- imgs = os.listdir(IMAGE_DIR) if os.path.exists(IMAGE_DIR) else []
79
- return {"engine": engine is not None, "db": os.path.exists(DB_TARGET), "img_count": len(imgs)}
80
 
81
  @app.post("/search")
82
  async def search(text: str = Form(None), weight: float = Form(0.5), file: UploadFile = File(None)):
83
- if not engine: raise HTTPException(503, detail="AI Engine Loading")
84
  t_path = None
85
  try:
86
  if file:
@@ -89,15 +88,29 @@ async def search(text: str = Form(None), weight: float = Form(0.5), file: Upload
89
 
90
  results = engine.search(text_query=text, image_file=t_path, text_weight=weight)
91
 
92
- # 3. PATH SANITIZER: Converts internal paths to public browser URLs
 
 
93
  for r in results:
94
  raw_path = r.get('url') or r.get('path') or ""
95
- filename = os.path.basename(raw_path)
96
- r['url'] = f"Productimages/{urllib.parse.quote(filename)}"
 
 
 
 
 
 
 
 
 
 
 
97
 
98
- return {"results": results}
99
  except Exception as e:
100
- return {"results": [], "error": str(e)}
 
101
  finally:
102
  if t_path and os.path.exists(t_path): os.remove(t_path)
103
 
 
3
  import zipfile
4
  import logging
5
  import urllib.parse
6
+ import unicodedata
7
  from fastapi import FastAPI, File, UploadFile, Form, HTTPException
8
  from fastapi.middleware.cors import CORSMiddleware
9
  from fastapi.staticfiles import StaticFiles
 
26
  IMAGE_DIR = "Productimages"
27
  DB_TARGET = "orbiitt.db"
28
 
29
+ def normalize_filename(name):
30
+ """
31
+ Cleans filenames by removing smart quotes, normalizing unicode,
32
+ and stripping spaces to ensure the URL matches the file on disk.
33
+ """
34
+ # 1. Replace Unicode smart quotes/dashes with standard ASCII
35
+ name = name.replace('’', "'").replace('‘', "'").replace('“', '"').replace('”', '"').replace('–', '-')
36
+ # 2. Convert to ASCII (removes accents/unsupported symbols)
37
+ name = unicodedata.normalize('NFKD', name).encode('ascii', 'ignore').decode('ascii')
38
+ # 3. Final cleanup
39
+ return name.strip()
40
+
41
  def setup_environment():
42
  token = os.environ.get("HF_TOKEN")
 
43
  if os.path.exists(IMAGE_DIR): shutil.rmtree(IMAGE_DIR)
44
  os.makedirs(IMAGE_DIR)
45
 
46
  try:
47
+ logger.info("Syncing Dataset from Hugging Face...")
48
  snapshot_download(repo_id=DATASET_REPO, repo_type="dataset", token=token, local_dir=".")
49
 
50
  if os.path.exists("orbiitt_db.zip"):
 
54
  for root, _, files in os.walk("temp_extract"):
55
  for f in files:
56
  src = os.path.join(root, f)
 
57
  if f.lower() in ["orbiitt_db", "orbiitt.db"] or (f.endswith(".db") and not f.startswith(".")):
58
  shutil.copy(src, f"./{DB_TARGET}")
 
59
  elif f.lower().endswith(('.png', '.jpg', '.jpeg', '.webp')):
60
+ # Normalize the file name before saving it to disk
61
+ clean_name = normalize_filename(urllib.parse.unquote(f))
62
  dest = os.path.join(IMAGE_DIR, clean_name)
63
  shutil.copy(src, dest)
64
  shutil.rmtree("temp_extract")
65
+ logger.info(f"Environment Ready: {len(os.listdir(IMAGE_DIR))} images stored.")
 
 
 
 
 
66
  except Exception as e:
67
+ logger.error(f"Setup Error: {e}")
68
 
69
  setup_environment()
 
70
  app.mount("/Productimages", StaticFiles(directory=IMAGE_DIR), name="Productimages")
71
 
72
  engine = None
 
74
  from orbiitt_engine import OrbiittEngine
75
  if os.path.exists(DB_TARGET):
76
  engine = OrbiittEngine()
 
77
  except Exception as e:
78
+ logger.error(f"AI Engine Failed to Start: {e}")
 
 
 
 
 
79
 
80
  @app.post("/search")
81
  async def search(text: str = Form(None), weight: float = Form(0.5), file: UploadFile = File(None)):
82
+ if not engine: raise HTTPException(503, detail="AI Engine Offline")
83
  t_path = None
84
  try:
85
  if file:
 
88
 
89
  results = engine.search(text_query=text, image_file=t_path, text_weight=weight)
90
 
91
+ all_files = os.listdir(IMAGE_DIR)
92
+ valid_results = []
93
+
94
  for r in results:
95
  raw_path = r.get('url') or r.get('path') or ""
96
+ # Normalize the database name to match the cleaned names on disk
97
+ fname = normalize_filename(os.path.basename(raw_path))
98
+
99
+ if fname in all_files:
100
+ r['url'] = f"Productimages/{urllib.parse.quote(fname)}"
101
+ valid_results.append(r)
102
+ else:
103
+ # Fallback: check for substring matches (handles truncation errors)
104
+ for disk_file in all_files:
105
+ if fname[:30] in disk_file: # Compare first 30 chars
106
+ r['url'] = f"Productimages/{urllib.parse.quote(disk_file)}"
107
+ valid_results.append(r)
108
+ break
109
 
110
+ return {"results": valid_results}
111
  except Exception as e:
112
+ logger.error(f"Search Process Error: {e}")
113
+ return {"results": []}
114
  finally:
115
  if t_path and os.path.exists(t_path): os.remove(t_path)
116