Alshargi commited on
Commit
bd907d1
ยท
verified ยท
1 Parent(s): 89974fd

Update api.py

Browse files
Files changed (1) hide show
  1. api.py +26 -25
api.py CHANGED
@@ -38,8 +38,8 @@ app = FastAPI(title="Hadith FAISS API", version="1.0")
38
 
39
  app.add_middleware(
40
  CORSMiddleware,
41
- allow_origins=["*"], # ู„ุงุญู‚ุงู‹: ุงุณุชุจุฏู„ู‡ุง ุจุฏูˆู…ูŠู† ู…ูˆู‚ุนูƒ ู„ู„ุฃู…ุงู†
42
- allow_credentials=False,
43
  allow_methods=["*"],
44
  allow_headers=["*"],
45
  )
@@ -92,15 +92,11 @@ def load_items():
92
  with open(JSON_PATH, "r", encoding="utf-8") as f:
93
  _items = json.load(f)
94
 
95
- if not isinstance(_items, list):
96
- raise RuntimeError("Dataset JSON root must be a list")
97
-
98
  _item_by_id = {}
99
  for it in _items:
100
  cid = it.get("corpusID")
101
- if cid is None:
102
- continue
103
- _item_by_id[int(cid)] = it
104
 
105
 
106
  def get_model() -> SentenceTransformer:
@@ -110,14 +106,19 @@ def get_model() -> SentenceTransformer:
110
  return _model
111
 
112
 
113
- def save_artifacts(index: faiss.Index, emb: np.ndarray, id_by_pos: List[int], pos_by_id: Dict[int, int]):
 
 
 
 
 
114
  ensure_dirs()
115
 
116
  faiss.write_index(index, INDEX_PATH)
117
- np.save(EMB_PATH, emb) # creates EMB_PATH + ".npy"
118
 
119
  with open(ID_BY_POS_PATH, "w", encoding="utf-8") as f:
120
- json.dump(id_by_pos, f, ensure_ascii=False)
121
 
122
  pos_by_id_str = {str(k): int(v) for k, v in pos_by_id.items()}
123
  with open(POS_BY_ID_PATH, "w", encoding="utf-8") as f:
@@ -128,7 +129,7 @@ def load_artifacts():
128
  global _index, _emb, _id_by_pos, _pos_by_id, _DIM
129
 
130
  _index = faiss.read_index(INDEX_PATH)
131
- _emb = np.load(EMB_PATH + ".npy").astype("float32", copy=False)
132
 
133
  with open(ID_BY_POS_PATH, "r", encoding="utf-8") as f:
134
  _id_by_pos = [int(x) for x in json.load(f)]
@@ -143,6 +144,7 @@ def load_artifacts():
143
  def build_all():
144
  """
145
  Build embeddings + FAISS then save.
 
146
  """
147
  global _index, _emb, _id_by_pos, _pos_by_id, _DIM
148
 
@@ -160,17 +162,11 @@ def build_all():
160
  )
161
  emb = np.asarray(emb, dtype="float32")
162
 
163
- dim = emb.shape[1]
164
  index = faiss.IndexFlatIP(dim) # cosine via IP since normalized
165
  index.add(emb)
166
 
167
- # Build ID mappings
168
- id_by_pos = []
169
- for x in _items:
170
- if "corpusID" not in x:
171
- raise RuntimeError("Each item must have corpusID")
172
- id_by_pos.append(int(x["corpusID"]))
173
-
174
  pos_by_id = {cid: i for i, cid in enumerate(id_by_pos)}
175
 
176
  save_artifacts(index, emb, id_by_pos, pos_by_id)
@@ -179,14 +175,14 @@ def build_all():
179
  _emb = emb
180
  _id_by_pos = id_by_pos
181
  _pos_by_id = pos_by_id
182
- _DIM = int(dim)
183
 
184
  dt = time.time() - t0
185
  print(f"[build_all] Built + saved artifacts in {dt:.2f}s. dim={_DIM}, n={len(_id_by_pos)}")
186
 
187
 
188
  def require_ready():
189
- if (not _READY) or (_index is None) or (_emb is None):
190
  raise HTTPException(status_code=503, detail="API is not ready yet. Try again in a moment.")
191
 
192
 
@@ -241,11 +237,17 @@ def on_startup():
241
  except Exception as e:
242
  _READY = False
243
  print("[startup] FAILED โŒ", str(e))
 
244
 
245
 
246
  # -----------------------------
247
  # Routes
248
  # -----------------------------
 
 
 
 
 
249
  @app.get("/health")
250
  def health():
251
  return {
@@ -253,6 +255,7 @@ def health():
253
  "items": len(_items),
254
  "dim": _DIM,
255
  "has_artifacts": artifacts_exist(),
 
256
  }
257
 
258
 
@@ -263,7 +266,7 @@ def stats():
263
  "items": len(_items),
264
  "dim": _DIM,
265
  "index_type": type(_index).__name__,
266
- "model": MODEL_NAME,
267
  }
268
 
269
 
@@ -279,7 +282,6 @@ def get_item(corpus_id: int):
279
  @app.get("/similar/{corpus_id}")
280
  def similar(corpus_id: int, topk: int = 10):
281
  require_ready()
282
-
283
  cid = int(corpus_id)
284
  if cid not in _pos_by_id:
285
  raise HTTPException(status_code=404, detail="corpusID not found in index")
@@ -317,7 +319,6 @@ def similar(corpus_id: int, topk: int = 10):
317
  @app.post("/search")
318
  def search(req: SearchRequest):
319
  require_ready()
320
-
321
  q = (req.query or "").strip()
322
  if not q:
323
  raise HTTPException(status_code=400, detail="query is empty")
 
38
 
39
  app.add_middleware(
40
  CORSMiddleware,
41
+ allow_origins=["*"], # ู„ูˆ ุชุจูŠ ุชู‚ูู„ู‡ุง ุนู„ู‰ ุฏูˆู…ูŠู† ู…ูˆู‚ุนูƒ ูู‚ุท ู‚ู„ ู„ูŠ
42
+ allow_credentials=True,
43
  allow_methods=["*"],
44
  allow_headers=["*"],
45
  )
 
92
  with open(JSON_PATH, "r", encoding="utf-8") as f:
93
  _items = json.load(f)
94
 
 
 
 
95
  _item_by_id = {}
96
  for it in _items:
97
  cid = it.get("corpusID")
98
+ if cid is not None:
99
+ _item_by_id[int(cid)] = it
 
100
 
101
 
102
  def get_model() -> SentenceTransformer:
 
106
  return _model
107
 
108
 
109
+ def save_artifacts(
110
+ index: faiss.Index,
111
+ emb: np.ndarray,
112
+ id_by_pos: List[int],
113
+ pos_by_id: Dict[int, int],
114
+ ):
115
  ensure_dirs()
116
 
117
  faiss.write_index(index, INDEX_PATH)
118
+ np.save(EMB_PATH, emb) # will create embeddings.npy
119
 
120
  with open(ID_BY_POS_PATH, "w", encoding="utf-8") as f:
121
+ json.dump([int(x) for x in id_by_pos], f, ensure_ascii=False)
122
 
123
  pos_by_id_str = {str(k): int(v) for k, v in pos_by_id.items()}
124
  with open(POS_BY_ID_PATH, "w", encoding="utf-8") as f:
 
129
  global _index, _emb, _id_by_pos, _pos_by_id, _DIM
130
 
131
  _index = faiss.read_index(INDEX_PATH)
132
+ _emb = np.load(EMB_PATH + ".npy").astype("float32")
133
 
134
  with open(ID_BY_POS_PATH, "r", encoding="utf-8") as f:
135
  _id_by_pos = [int(x) for x in json.load(f)]
 
144
  def build_all():
145
  """
146
  Build embeddings + FAISS then save.
147
+ This should run only if artifacts are missing.
148
  """
149
  global _index, _emb, _id_by_pos, _pos_by_id, _DIM
150
 
 
162
  )
163
  emb = np.asarray(emb, dtype="float32")
164
 
165
+ dim = int(emb.shape[1])
166
  index = faiss.IndexFlatIP(dim) # cosine via IP since normalized
167
  index.add(emb)
168
 
169
+ id_by_pos = [int(x["corpusID"]) for x in _items]
 
 
 
 
 
 
170
  pos_by_id = {cid: i for i, cid in enumerate(id_by_pos)}
171
 
172
  save_artifacts(index, emb, id_by_pos, pos_by_id)
 
175
  _emb = emb
176
  _id_by_pos = id_by_pos
177
  _pos_by_id = pos_by_id
178
+ _DIM = dim
179
 
180
  dt = time.time() - t0
181
  print(f"[build_all] Built + saved artifacts in {dt:.2f}s. dim={_DIM}, n={len(_id_by_pos)}")
182
 
183
 
184
  def require_ready():
185
+ if not _READY or _index is None or _emb is None:
186
  raise HTTPException(status_code=503, detail="API is not ready yet. Try again in a moment.")
187
 
188
 
 
237
  except Exception as e:
238
  _READY = False
239
  print("[startup] FAILED โŒ", str(e))
240
+ # keep app up but not ready
241
 
242
 
243
  # -----------------------------
244
  # Routes
245
  # -----------------------------
246
+ @app.get("/")
247
+ def root():
248
+ return {"name": "Hadith FAISS API", "ready": _READY}
249
+
250
+
251
  @app.get("/health")
252
  def health():
253
  return {
 
255
  "items": len(_items),
256
  "dim": _DIM,
257
  "has_artifacts": artifacts_exist(),
258
+ "model": MODEL_NAME,
259
  }
260
 
261
 
 
266
  "items": len(_items),
267
  "dim": _DIM,
268
  "index_type": type(_index).__name__,
269
+ "topk_max": TOPK_MAX,
270
  }
271
 
272
 
 
282
  @app.get("/similar/{corpus_id}")
283
  def similar(corpus_id: int, topk: int = 10):
284
  require_ready()
 
285
  cid = int(corpus_id)
286
  if cid not in _pos_by_id:
287
  raise HTTPException(status_code=404, detail="corpusID not found in index")
 
319
  @app.post("/search")
320
  def search(req: SearchRequest):
321
  require_ready()
 
322
  q = (req.query or "").strip()
323
  if not q:
324
  raise HTTPException(status_code=400, detail="query is empty")