aniketkumar1106 commited on
Commit
51115b7
Β·
verified Β·
1 Parent(s): 1c253ff

Update server.py

Browse files
Files changed (1) hide show
  1. server.py +46 -24
server.py CHANGED
@@ -9,21 +9,25 @@ from fastapi.middleware.cors import CORSMiddleware
9
  from fastapi.staticfiles import StaticFiles
10
  from huggingface_hub import snapshot_download
11
 
12
- # 1. SETUP LOGGING
13
  logging.basicConfig(level=logging.INFO)
14
  logger = logging.getLogger(__name__)
15
 
16
- # 2. DEFINE PATHS
17
  DATASET_REPO = "aniketkumar1106/orbit-data"
18
  IMAGE_DIR = "Productimages"
19
  DB_TARGET = "orbiitt.db"
20
 
21
- # 3. CRITICAL FIX: Create directory BEFORE FastAPI initialization
 
 
 
 
 
 
22
  if not os.path.exists(IMAGE_DIR):
23
  os.makedirs(IMAGE_DIR, exist_ok=True)
24
- logger.info(f"Created initial directory: {IMAGE_DIR}")
25
 
26
- # 4. INITIALIZE APP
27
  app = FastAPI()
28
 
29
  app.add_middleware(
@@ -34,16 +38,20 @@ app.add_middleware(
34
  allow_headers=["*"],
35
  )
36
 
37
- # 5. MOUNT STORAGE (Will no longer crash)
38
  app.mount("/Productimages", StaticFiles(directory=IMAGE_DIR), name="Productimages")
39
 
40
- # 6. GLOBAL ENGINE STATE
41
  engine = None
42
- loading_status = "System Booting..."
43
 
44
  def normalize_filename(name):
45
- """Handles smart quotes and complex symbols to prevent 404s."""
 
 
 
46
  name = name.replace('’', "'").replace('β€˜', "'").replace('β€œ', '"').replace('”', '"').replace('–', '-')
 
47
  name = unicodedata.normalize('NFKD', name).encode('ascii', 'ignore').decode('ascii')
48
  return name.strip()
49
 
@@ -52,13 +60,13 @@ def setup_environment():
52
  global engine, loading_status
53
  token = os.environ.get("HF_TOKEN")
54
 
55
- # We clean and re-create inside startup to ensure a fresh sync
56
  if os.path.exists(IMAGE_DIR):
57
  shutil.rmtree(IMAGE_DIR)
58
  os.makedirs(IMAGE_DIR, exist_ok=True)
59
 
60
  try:
61
- loading_status = "Syncing Orbit Assets..."
62
  logger.info(loading_status)
63
  snapshot_download(repo_id=DATASET_REPO, repo_type="dataset", token=token, local_dir=".")
64
 
@@ -72,13 +80,12 @@ def setup_environment():
72
  if f.lower() in ["orbiitt_db", "orbiitt.db"] or (f.endswith(".db") and not f.startswith(".")):
73
  shutil.copy(src, f"./{DB_TARGET}")
74
  elif f.lower().endswith(('.png', '.jpg', '.jpeg', '.webp')):
75
- # Normalize filename on disk
76
- clean_name = normalize_filename(urllib.parse.unquote(f))
77
  shutil.copy(src, os.path.join(IMAGE_DIR, clean_name))
78
  shutil.rmtree("temp_extract")
79
 
80
- loading_status = "Waking up AI Engine..."
81
- logger.info(loading_status)
82
  try:
83
  from orbiitt_engine import OrbiittEngine
84
  engine = OrbiittEngine()
@@ -108,30 +115,45 @@ async def search(text: str = Form(None), weight: float = Form(0.5), file: Upload
108
  with open(t_path, "wb") as b:
109
  b.write(await file.read())
110
 
111
- results = engine.search(text_query=text, image_file=t_path, text_weight=weight)
 
112
 
113
  all_files = os.listdir(IMAGE_DIR)
114
  valid_results = []
115
 
116
  for r in results:
 
 
 
 
 
 
 
117
  raw_path = r.get('url') or r.get('path') or ""
118
- fname = normalize_filename(os.path.basename(raw_path))
119
 
120
- # Fuzzy match check
121
  match = None
122
- if fname in all_files:
123
- match = fname
124
  else:
 
125
  for disk_file in all_files:
126
- if fname[:20] in disk_file:
127
  match = disk_file
128
  break
129
 
130
  if match:
131
- r['url'] = f"Productimages/{urllib.parse.quote(match)}"
132
- valid_results.append(r)
 
 
 
 
 
 
133
 
134
- return {"results": valid_results}
135
  except Exception as e:
136
  logger.error(f"Search error: {e}")
137
  return {"results": [], "error": str(e)}
 
9
  from fastapi.staticfiles import StaticFiles
10
  from huggingface_hub import snapshot_download
11
 
12
+ # 1. LOGGING & PATHS
13
  logging.basicConfig(level=logging.INFO)
14
  logger = logging.getLogger(__name__)
15
 
 
16
  DATASET_REPO = "aniketkumar1106/orbit-data"
17
  IMAGE_DIR = "Productimages"
18
  DB_TARGET = "orbiitt.db"
19
 
20
+ # 2. ACCURACY CONFIGURATION
21
+ # Scores below this are considered "random noise" and discarded.
22
+ # 0.22 is the standard for SigLIP 2 to separate categories effectively.
23
+ MIN_CONFIDENCE_THRESHOLD = 0.22
24
+
25
+ # 3. DIRECTORY BOOTSTRAP
26
+ # Fixes "RuntimeError: Directory 'Productimages' does not exist"
27
  if not os.path.exists(IMAGE_DIR):
28
  os.makedirs(IMAGE_DIR, exist_ok=True)
 
29
 
30
+ # 4. APP INITIALIZATION
31
  app = FastAPI()
32
 
33
  app.add_middleware(
 
38
  allow_headers=["*"],
39
  )
40
 
41
+ # Mount static files (guaranteed to find folder now)
42
  app.mount("/Productimages", StaticFiles(directory=IMAGE_DIR), name="Productimages")
43
 
44
+ # 5. GLOBAL ENGINE STATE
45
  engine = None
46
+ loading_status = "System Initializing..."
47
 
48
  def normalize_filename(name):
49
+ """Deep normalization to ensure AI path strings match local disk files."""
50
+ if not name: return ""
51
+ name = urllib.parse.unquote(name)
52
+ # Convert smart quotes/dashes to standard ASCII
53
  name = name.replace('’', "'").replace('β€˜', "'").replace('β€œ', '"').replace('”', '"').replace('–', '-')
54
+ # Strip non-ASCII/Unicode decomposition
55
  name = unicodedata.normalize('NFKD', name).encode('ascii', 'ignore').decode('ascii')
56
  return name.strip()
57
 
 
60
  global engine, loading_status
61
  token = os.environ.get("HF_TOKEN")
62
 
63
+ # Clean and re-sync on boot
64
  if os.path.exists(IMAGE_DIR):
65
  shutil.rmtree(IMAGE_DIR)
66
  os.makedirs(IMAGE_DIR, exist_ok=True)
67
 
68
  try:
69
+ loading_status = "Downloading Orbit Assets..."
70
  logger.info(loading_status)
71
  snapshot_download(repo_id=DATASET_REPO, repo_type="dataset", token=token, local_dir=".")
72
 
 
80
  if f.lower() in ["orbiitt_db", "orbiitt.db"] or (f.endswith(".db") and not f.startswith(".")):
81
  shutil.copy(src, f"./{DB_TARGET}")
82
  elif f.lower().endswith(('.png', '.jpg', '.jpeg', '.webp')):
83
+ # Normalize BEFORE saving to disk
84
+ clean_name = normalize_filename(f)
85
  shutil.copy(src, os.path.join(IMAGE_DIR, clean_name))
86
  shutil.rmtree("temp_extract")
87
 
88
+ loading_status = "Loading SigLIP 2 Engine..."
 
89
  try:
90
  from orbiitt_engine import OrbiittEngine
91
  engine = OrbiittEngine()
 
115
  with open(t_path, "wb") as b:
116
  b.write(await file.read())
117
 
118
+ # Increase top_k to 40 so we have a larger pool to filter from
119
+ results = engine.search(text_query=text, image_file=t_path, text_weight=weight, top_k=40)
120
 
121
  all_files = os.listdir(IMAGE_DIR)
122
  valid_results = []
123
 
124
  for r in results:
125
+ score = r.get('score', 0)
126
+
127
+ # ACCURACY FIX: The Confidence Gate
128
+ # Prevents random cross-category matching
129
+ if score < MIN_CONFIDENCE_THRESHOLD:
130
+ continue
131
+
132
  raw_path = r.get('url') or r.get('path') or ""
133
+ fname_from_ai = normalize_filename(os.path.basename(raw_path))
134
 
135
+ # Precise Matching Logic
136
  match = None
137
+ if fname_from_ai in all_files:
138
+ match = fname_from_ai
139
  else:
140
+ # Substring fallback for robustness
141
  for disk_file in all_files:
142
+ if fname_from_ai[:20].lower() in disk_file.lower():
143
  match = disk_file
144
  break
145
 
146
  if match:
147
+ valid_results.append({
148
+ "id": r.get('id', 'Product'),
149
+ "url": f"Productimages/{urllib.parse.quote(match)}",
150
+ "score": score
151
+ })
152
+
153
+ # Keep results strictly sorted by AI confidence
154
+ valid_results.sort(key=lambda x: x['score'], reverse=True)
155
 
156
+ return {"results": valid_results[:20]} # Return top 20 verified matches
157
  except Exception as e:
158
  logger.error(f"Search error: {e}")
159
  return {"results": [], "error": str(e)}