Spaces:
Runtime error
Runtime error
File size: 9,210 Bytes
a52126c 989a91c 846d6b0 213e539 7db5fb6 427116d ca04ee5 4db3151 d80107f 86ccee3 d80107f 427116d 846d6b0 03e77d7 1c253ff 4db3151 36680f9 51115b7 36680f9 427116d 1c253ff 846d6b0 d80107f ea04b7e ca04ee5 ea04b7e d80107f 427116d 6063744 427116d 6063744 7db5fb6 51115b7 7db5fb6 427116d 7db5fb6 36680f9 427116d 6063744 a52126c 6063744 03e77d7 427116d 36680f9 a2f4ac8 a52126c 427116d 03e77d7 846d6b0 989a91c 03e77d7 fd44eb0 4db3151 03e77d7 fd44eb0 846d6b0 4db3151 03e77d7 846d6b0 4db3151 6063744 03e77d7 36680f9 427116d 6063744 4db3151 6063744 427116d 6063744 427116d 4db3151 a52126c 427116d 4db3151 427116d 4db3151 427116d a52126c 4db3151 03e77d7 4db3151 6063744 d80107f 36680f9 d80107f 989a91c 6063744 427116d 36680f9 6063744 ea04b7e 427116d 36680f9 427116d 846d6b0 03e77d7 427116d 36680f9 03e77d7 36680f9 427116d 213e539 7db5fb6 427116d 7db5fb6 213e539 51115b7 427116d 03e77d7 427116d 51115b7 4db3151 7db5fb6 6063744 427116d 7db5fb6 4db3151 427116d 7db5fb6 6063744 427116d 4db3151 51115b7 427116d 51115b7 427116d 846d6b0 427116d 6063744 ea04b7e 427116d 36680f9 d80107f ea04b7e a52126c 03e77d7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 |
import os
import shutil
import zipfile
import logging
import urllib.parse
import unicodedata
import threading
import anyio
from fastapi import FastAPI, File, UploadFile, Form, HTTPException
from fastapi.responses import FileResponse
from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles
from huggingface_hub import snapshot_download
# 1. SETTINGS & PATHS
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
DATASET_REPO = "aniketkumar1106/orbit-data"
IMAGE_DIR = "Productimages"
DB_TARGET_FOLDER = "orbiitt_db" # The folder ChromaDB expects
MIN_CONFIDENCE_THRESHOLD = 0.1
# BOOTSTRAP: Mandatory folder creation
os.makedirs(IMAGE_DIR, exist_ok=True)
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# 2. GLOBAL STATE
engine = None
loading_status = "System Booting..."
def normalize_filename(name):
if not name: return ""
name = urllib.parse.unquote(name)
name = name.replace('’', "'").replace('‘', "'").replace('“', '"').replace('”', '"').replace('–', '-')
return unicodedata.normalize('NFKD', name).encode('ascii', 'ignore').decode('ascii').strip()
# 3. BACKGROUND INITIALIZATION
def background_sync():
global engine, loading_status
token = os.environ.get("HF_TOKEN")
# Cleanup old images to prevent duplicates or stale data
logger.info("Cleaning up old image directory...")
for f in os.listdir(IMAGE_DIR):
p = os.path.join(IMAGE_DIR, f)
try:
if os.path.isfile(p): os.unlink(p)
elif os.path.isdir(p): shutil.rmtree(p)
except: pass
try:
loading_status = "Syncing Assets..."
logger.info(f"Downloading dataset from {DATASET_REPO}...")
# Download everything (including the new zip) to current directory
snapshot_download(repo_id=DATASET_REPO, repo_type="dataset", token=token, local_dir=".")
# --- STEP A: HANDLE IMAGE ZIP (Productimages.zip) ---
if os.path.exists("Productimages.zip"):
loading_status = "Extracting Images..."
logger.info("Found Productimages.zip! Extracting...")
# Extract to a temp folder first
with zipfile.ZipFile("Productimages.zip", 'r') as z:
z.extractall("temp_images_zip")
# Move images from temp zip extract to the main IMAGE_DIR
count_zip_images = 0
for root, dirs, files in os.walk("temp_images_zip"):
for f in files:
# Ignore hidden files (like __MACOSX)
if f.startswith('.'): continue
if f.lower().endswith(('.png', '.jpg', '.jpeg', '.webp')):
src = os.path.join(root, f)
clean_name = normalize_filename(f)
dst = os.path.join(IMAGE_DIR, clean_name)
try:
# Move and overwrite if necessary
shutil.move(src, dst)
count_zip_images += 1
except: pass
shutil.rmtree("temp_images_zip")
logger.info(f"Extracted {count_zip_images} images from Productimages.zip")
# --- STEP B: HANDLE DATABASE ZIP (orbiitt_db.zip) ---
if os.path.exists("orbiitt_db.zip"):
loading_status = "Extracting Database..."
logger.info("Extracting orbiitt_db.zip...")
with zipfile.ZipFile("orbiitt_db.zip", 'r') as z:
z.extractall("temp_extract")
# Smart Extraction: Find the ChromaDB folder logic
db_found = False
for root, dirs, files in os.walk("temp_extract"):
# Identifying ChromaDB by its signature file
if "chroma.sqlite3" in files:
if os.path.exists(DB_TARGET_FOLDER):
shutil.rmtree(DB_TARGET_FOLDER)
# Move the directory containing the sqlite3 file to our target location
shutil.move(root, DB_TARGET_FOLDER)
db_found = True
shutil.rmtree("temp_extract")
if not db_found and not os.path.exists(DB_TARGET_FOLDER):
os.makedirs(DB_TARGET_FOLDER, exist_ok=True)
# --- STEP C: CATCH ANY LEFTOVER LOOSE IMAGES ---
# If any images were downloaded loose (not in zip), move them too
for root, dirs, files in os.walk("."):
if IMAGE_DIR in root or ".git" in root or DB_TARGET_FOLDER in root:
continue
for f in files:
if f.startswith('.'): continue
if f.lower().endswith(('.png', '.jpg', '.jpeg', '.webp')):
src = os.path.join(root, f)
clean_name = normalize_filename(f)
dst = os.path.join(IMAGE_DIR, clean_name)
if not os.path.exists(dst):
try: shutil.move(src, dst)
except: pass
# LOGGING FILE COUNT FOR VALIDATION
final_count = len(os.listdir(IMAGE_DIR))
logger.info(f"DISK VALIDATION: {final_count} images ready in {IMAGE_DIR}")
loading_status = "Loading AI Engine..."
try:
from orbiitt_engine import OrbiittEngine
# Initialize with the CORRECT folder path
engine = OrbiittEngine(db_path=f"./{DB_TARGET_FOLDER}")
loading_status = "Ready"
logger.info(">>> ENGINE ONLINE <<<")
except Exception as e:
loading_status = f"Engine Error: {str(e)}"
logger.error(f"Engine Failed: {e}")
except Exception as e:
loading_status = f"Sync Error: {str(e)}"
logger.error(f"Sync Failed: {e}")
@app.on_event("startup")
async def startup_event():
thread = threading.Thread(target=background_sync, daemon=True)
thread.start()
# Mount Static Images
app.mount("/Productimages", StaticFiles(directory=IMAGE_DIR), name="Productimages")
# Serve UI (Root Endpoint)
@app.get("/")
async def read_index():
if os.path.exists('index.html'):
return FileResponse('index.html')
return {"status": "Online", "message": "index.html not found, but server is running."}
@app.get("/health")
def health():
return {"status": loading_status, "ready": engine is not None}
# 4. FIXED SEARCH LOGIC
@app.post("/search")
async def search(text: str = Form(None), weight: float = Form(0.5), file: UploadFile = File(None)):
if not engine:
raise HTTPException(status_code=503, detail=f"Engine not ready: {loading_status}")
t_path = f"buffer_{os.getpid()}.jpg" if file else None
try:
actual_weight = weight
if not text and file: actual_weight = 0.0
if text and not file: actual_weight = 1.0
if file and t_path:
content = await file.read()
async with await anyio.open_file(t_path, "wb") as f:
await f.write(content)
# CORRECTED: Calling engine WITHOUT top_k
results = await anyio.to_thread.run_sync(
lambda: engine.search(
text_query=text,
image_file=t_path,
text_weight=actual_weight
)
)
all_files = os.listdir(IMAGE_DIR)
final_list = []
seen_ids = set()
for r in results:
score = r.get('score', 0)
pid = r.get('id', 'Product')
if score < MIN_CONFIDENCE_THRESHOLD or pid in seen_ids:
continue
# The engine returns a clean ID/path, let's match it to disk
fname_from_db = os.path.basename(r.get('id', ''))
fname = normalize_filename(fname_from_db)
match = None
if fname in all_files:
match = fname
else:
# Fuzzy fallback if exact match fails
for disk_f in all_files:
if fname[:15].lower() in disk_f.lower():
match = disk_f
break
if match:
final_list.append({
"id": pid,
# Ensure URL is properly encoded for web
"url": f"Productimages/{urllib.parse.quote(match)}",
"score": round(float(score), 4)
})
seen_ids.add(pid)
final_list.sort(key=lambda x: x['score'], reverse=True)
return {"results": final_list[:20]}
except Exception as e:
logger.error(f"Search Failure: {e}")
return {"results": [], "error": str(e)}
finally:
if t_path and os.path.exists(t_path):
try: os.remove(t_path)
except: pass
if __name__ == "__main__":
import uvicorn
# FIXED: Listen on all interfaces (0.0.0.0) and correct port 7860
uvicorn.run(app, host="0.0.0.0", port=7860) |