File size: 9,210 Bytes
a52126c
 
989a91c
846d6b0
213e539
7db5fb6
427116d
 
ca04ee5
4db3151
d80107f
 
86ccee3
d80107f
427116d
846d6b0
 
 
03e77d7
1c253ff
4db3151
36680f9
51115b7
36680f9
427116d
1c253ff
846d6b0
d80107f
ea04b7e
 
 
ca04ee5
ea04b7e
 
 
d80107f
427116d
6063744
427116d
6063744
7db5fb6
51115b7
 
7db5fb6
427116d
7db5fb6
36680f9
427116d
6063744
a52126c
6063744
03e77d7
 
427116d
 
 
 
 
36680f9
a2f4ac8
a52126c
427116d
03e77d7
 
 
846d6b0
989a91c
03e77d7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fd44eb0
4db3151
03e77d7
fd44eb0
846d6b0
 
4db3151
 
 
 
 
 
 
 
 
 
03e77d7
846d6b0
4db3151
 
 
6063744
03e77d7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36680f9
 
 
 
427116d
6063744
 
4db3151
 
6063744
427116d
6063744
427116d
4db3151
 
a52126c
427116d
4db3151
427116d
 
 
 
 
 
4db3151
427116d
a52126c
4db3151
 
 
03e77d7
 
 
4db3151
6063744
 
 
d80107f
36680f9
d80107f
989a91c
6063744
427116d
 
36680f9
6063744
ea04b7e
427116d
36680f9
 
427116d
 
 
 
 
846d6b0
03e77d7
427116d
36680f9
 
 
03e77d7
36680f9
427116d
213e539
7db5fb6
427116d
 
7db5fb6
213e539
51115b7
427116d
03e77d7
427116d
51115b7
 
4db3151
 
 
7db5fb6
6063744
427116d
 
7db5fb6
4db3151
427116d
 
 
7db5fb6
6063744
 
427116d
 
4db3151
51115b7
427116d
51115b7
427116d
 
 
 
 
846d6b0
427116d
6063744
ea04b7e
427116d
 
36680f9
d80107f
ea04b7e
a52126c
03e77d7
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
import os
import shutil
import zipfile
import logging
import urllib.parse
import unicodedata
import threading
import anyio
from fastapi import FastAPI, File, UploadFile, Form, HTTPException
from fastapi.responses import FileResponse
from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles
from huggingface_hub import snapshot_download

# 1. SETTINGS & PATHS
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

DATASET_REPO = "aniketkumar1106/orbit-data"
IMAGE_DIR = "Productimages"
DB_TARGET_FOLDER = "orbiitt_db" # The folder ChromaDB expects
MIN_CONFIDENCE_THRESHOLD = 0.1

# BOOTSTRAP: Mandatory folder creation
os.makedirs(IMAGE_DIR, exist_ok=True)

app = FastAPI()

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# 2. GLOBAL STATE
engine = None
loading_status = "System Booting..."

def normalize_filename(name):
    if not name: return ""
    name = urllib.parse.unquote(name)
    name = name.replace('’', "'").replace('‘', "'").replace('“', '"').replace('”', '"').replace('–', '-')
    return unicodedata.normalize('NFKD', name).encode('ascii', 'ignore').decode('ascii').strip()

# 3. BACKGROUND INITIALIZATION
def background_sync():
    global engine, loading_status
    token = os.environ.get("HF_TOKEN")
    
    # Cleanup old images to prevent duplicates or stale data
    logger.info("Cleaning up old image directory...")
    for f in os.listdir(IMAGE_DIR):
        p = os.path.join(IMAGE_DIR, f)
        try:
            if os.path.isfile(p): os.unlink(p)
            elif os.path.isdir(p): shutil.rmtree(p)
        except: pass

    try:
        loading_status = "Syncing Assets..."
        logger.info(f"Downloading dataset from {DATASET_REPO}...")
        
        # Download everything (including the new zip) to current directory
        snapshot_download(repo_id=DATASET_REPO, repo_type="dataset", token=token, local_dir=".")

        # --- STEP A: HANDLE IMAGE ZIP (Productimages.zip) ---
        if os.path.exists("Productimages.zip"):
            loading_status = "Extracting Images..."
            logger.info("Found Productimages.zip! Extracting...")
            
            # Extract to a temp folder first
            with zipfile.ZipFile("Productimages.zip", 'r') as z:
                z.extractall("temp_images_zip")
            
            # Move images from temp zip extract to the main IMAGE_DIR
            count_zip_images = 0
            for root, dirs, files in os.walk("temp_images_zip"):
                for f in files:
                    # Ignore hidden files (like __MACOSX)
                    if f.startswith('.'): continue
                    
                    if f.lower().endswith(('.png', '.jpg', '.jpeg', '.webp')):
                        src = os.path.join(root, f)
                        clean_name = normalize_filename(f)
                        dst = os.path.join(IMAGE_DIR, clean_name)
                        try:
                            # Move and overwrite if necessary
                            shutil.move(src, dst)
                            count_zip_images += 1
                        except: pass
            
            shutil.rmtree("temp_images_zip")
            logger.info(f"Extracted {count_zip_images} images from Productimages.zip")

        # --- STEP B: HANDLE DATABASE ZIP (orbiitt_db.zip) ---
        if os.path.exists("orbiitt_db.zip"):
            loading_status = "Extracting Database..."
            logger.info("Extracting orbiitt_db.zip...")
            with zipfile.ZipFile("orbiitt_db.zip", 'r') as z:
                z.extractall("temp_extract")
            
            # Smart Extraction: Find the ChromaDB folder logic
            db_found = False
            for root, dirs, files in os.walk("temp_extract"):
                # Identifying ChromaDB by its signature file
                if "chroma.sqlite3" in files:
                    if os.path.exists(DB_TARGET_FOLDER):
                        shutil.rmtree(DB_TARGET_FOLDER)
                    # Move the directory containing the sqlite3 file to our target location
                    shutil.move(root, DB_TARGET_FOLDER)
                    db_found = True
            
            shutil.rmtree("temp_extract")
            
            if not db_found and not os.path.exists(DB_TARGET_FOLDER):
                os.makedirs(DB_TARGET_FOLDER, exist_ok=True)

        # --- STEP C: CATCH ANY LEFTOVER LOOSE IMAGES ---
        # If any images were downloaded loose (not in zip), move them too
        for root, dirs, files in os.walk("."):
            if IMAGE_DIR in root or ".git" in root or DB_TARGET_FOLDER in root:
                continue
            for f in files:
                if f.startswith('.'): continue
                
                if f.lower().endswith(('.png', '.jpg', '.jpeg', '.webp')):
                    src = os.path.join(root, f)
                    clean_name = normalize_filename(f)
                    dst = os.path.join(IMAGE_DIR, clean_name)
                    if not os.path.exists(dst):
                        try: shutil.move(src, dst)
                        except: pass

        # LOGGING FILE COUNT FOR VALIDATION
        final_count = len(os.listdir(IMAGE_DIR))
        logger.info(f"DISK VALIDATION: {final_count} images ready in {IMAGE_DIR}")

        loading_status = "Loading AI Engine..."
        try:
            from orbiitt_engine import OrbiittEngine
            # Initialize with the CORRECT folder path
            engine = OrbiittEngine(db_path=f"./{DB_TARGET_FOLDER}")
            loading_status = "Ready"
            logger.info(">>> ENGINE ONLINE <<<")
        except Exception as e:
            loading_status = f"Engine Error: {str(e)}"
            logger.error(f"Engine Failed: {e}")
            
    except Exception as e:
        loading_status = f"Sync Error: {str(e)}"
        logger.error(f"Sync Failed: {e}")

@app.on_event("startup")
async def startup_event():
    thread = threading.Thread(target=background_sync, daemon=True)
    thread.start()

# Mount Static Images
app.mount("/Productimages", StaticFiles(directory=IMAGE_DIR), name="Productimages")

# Serve UI (Root Endpoint)
@app.get("/")
async def read_index():
    if os.path.exists('index.html'):
        return FileResponse('index.html')
    return {"status": "Online", "message": "index.html not found, but server is running."}

@app.get("/health")
def health():
    return {"status": loading_status, "ready": engine is not None}

# 4. FIXED SEARCH LOGIC
@app.post("/search")
async def search(text: str = Form(None), weight: float = Form(0.5), file: UploadFile = File(None)):
    if not engine:
        raise HTTPException(status_code=503, detail=f"Engine not ready: {loading_status}")
    
    t_path = f"buffer_{os.getpid()}.jpg" if file else None
    
    try:
        actual_weight = weight
        if not text and file: actual_weight = 0.0
        if text and not file: actual_weight = 1.0

        if file and t_path:
            content = await file.read()
            async with await anyio.open_file(t_path, "wb") as f:
                await f.write(content)
        
        # CORRECTED: Calling engine WITHOUT top_k
        results = await anyio.to_thread.run_sync(
            lambda: engine.search(
                text_query=text, 
                image_file=t_path, 
                text_weight=actual_weight
            )
        )
        
        all_files = os.listdir(IMAGE_DIR)
        final_list = []
        seen_ids = set()

        for r in results:
            score = r.get('score', 0)
            pid = r.get('id', 'Product')
            
            if score < MIN_CONFIDENCE_THRESHOLD or pid in seen_ids:
                continue

            # The engine returns a clean ID/path, let's match it to disk
            fname_from_db = os.path.basename(r.get('id', ''))
            fname = normalize_filename(fname_from_db)
            
            match = None
            if fname in all_files:
                match = fname
            else:
                # Fuzzy fallback if exact match fails
                for disk_f in all_files:
                    if fname[:15].lower() in disk_f.lower():
                        match = disk_f
                        break
            
            if match:
                final_list.append({
                    "id": pid,
                    # Ensure URL is properly encoded for web
                    "url": f"Productimages/{urllib.parse.quote(match)}",
                    "score": round(float(score), 4)
                })
                seen_ids.add(pid)

        final_list.sort(key=lambda x: x['score'], reverse=True)
        return {"results": final_list[:20]}

    except Exception as e:
        logger.error(f"Search Failure: {e}")
        return {"results": [], "error": str(e)}
    finally:
        if t_path and os.path.exists(t_path):
            try: os.remove(t_path)
            except: pass

if __name__ == "__main__":
    import uvicorn
    # FIXED: Listen on all interfaces (0.0.0.0) and correct port 7860
    uvicorn.run(app, host="0.0.0.0", port=7860)