Spaces:
Sleeping
Sleeping
Update main.py
Browse files
main.py
CHANGED
|
@@ -24,9 +24,10 @@ logger = logging.getLogger(__name__)
|
|
| 24 |
SYLLABI_DIR = "syllabi"
|
| 25 |
PAST_EXAMS_DIR = "past_exams"
|
| 26 |
|
| 27 |
-
|
|
|
|
| 28 |
EMBEDDING_MODEL = "models/text-embedding-004"
|
| 29 |
-
VISION_MODEL = "gemini-2.
|
| 30 |
|
| 31 |
# ---------------------------------------------------------------------------
|
| 32 |
# COMPLETE SUBJECT REGISTRY (all 24 PDFs on HuggingFace)
|
|
@@ -466,13 +467,25 @@ def load_index_from_firebase():
|
|
| 466 |
if not fb_vectors: return False
|
| 467 |
VECTOR_DB = []
|
| 468 |
valid = []
|
| 469 |
-
|
| 470 |
-
|
| 471 |
-
|
| 472 |
-
|
| 473 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 474 |
if valid:
|
| 475 |
-
VECTOR_MATRIX = np.vstack(valid)
|
|
|
|
| 476 |
|
| 477 |
fb_exams = fb_get("data_api/exams")
|
| 478 |
if fb_exams:
|
|
@@ -695,8 +708,13 @@ def search():
|
|
| 695 |
resp = c.models.embed_content(model=EMBEDDING_MODEL, contents=q)
|
| 696 |
qv = np.array(resp.embeddings[0].values).reshape(1, -1)
|
| 697 |
except Exception as e:
|
| 698 |
-
|
| 699 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 700 |
results = []
|
| 701 |
for idx in np.argsort(scores)[::-1]:
|
| 702 |
if scores[idx] < 0.3: break
|
|
|
|
| 24 |
SYLLABI_DIR = "syllabi"
|
| 25 |
PAST_EXAMS_DIR = "past_exams"
|
| 26 |
|
| 27 |
+
# Support both naming conventions (HuggingFace Space may use either)
|
| 28 |
+
GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY") or os.environ.get("Gemini")
|
| 29 |
EMBEDDING_MODEL = "models/text-embedding-004"
|
| 30 |
+
VISION_MODEL = "gemini-2.0-flash"
|
| 31 |
|
| 32 |
# ---------------------------------------------------------------------------
|
| 33 |
# COMPLETE SUBJECT REGISTRY (all 24 PDFs on HuggingFace)
|
|
|
|
| 467 |
if not fb_vectors: return False
|
| 468 |
VECTOR_DB = []
|
| 469 |
valid = []
|
| 470 |
+
expected_dim = 768 # text-embedding-004 output dimension
|
| 471 |
+
for entry in sorted(fb_vectors.keys() if isinstance(fb_vectors, dict) else range(len(fb_vectors))):
|
| 472 |
+
item = fb_vectors[entry] if isinstance(fb_vectors, dict) else fb_vectors[entry]
|
| 473 |
+
if not item: continue
|
| 474 |
+
raw_vec = item.get("vector")
|
| 475 |
+
if not raw_vec: continue
|
| 476 |
+
try:
|
| 477 |
+
vec = np.array(raw_vec, dtype=np.float32)
|
| 478 |
+
if vec.ndim != 1 or len(vec) != expected_dim:
|
| 479 |
+
logger.warning(f"Skipping vector with wrong shape: {vec.shape}")
|
| 480 |
+
continue
|
| 481 |
+
VECTOR_DB.append({"vector": vec, "meta": item["meta"]})
|
| 482 |
+
valid.append(vec)
|
| 483 |
+
except Exception as ve:
|
| 484 |
+
logger.warning(f"Skipping malformed vector entry: {ve}")
|
| 485 |
+
continue
|
| 486 |
if valid:
|
| 487 |
+
VECTOR_MATRIX = np.vstack(valid).astype(np.float32)
|
| 488 |
+
logger.info(f"Vector matrix shape: {VECTOR_MATRIX.shape if VECTOR_MATRIX is not None else None}")
|
| 489 |
|
| 490 |
fb_exams = fb_get("data_api/exams")
|
| 491 |
if fb_exams:
|
|
|
|
| 708 |
resp = c.models.embed_content(model=EMBEDDING_MODEL, contents=q)
|
| 709 |
qv = np.array(resp.embeddings[0].values).reshape(1, -1)
|
| 710 |
except Exception as e:
|
| 711 |
+
logger.error(f"Embed query failed: {e}")
|
| 712 |
+
return jsonify({"error": f"Embedding failed: {str(e)}"}), 500
|
| 713 |
+
try:
|
| 714 |
+
scores = cosine_similarity(qv, VECTOR_MATRIX)[0]
|
| 715 |
+
except Exception as e:
|
| 716 |
+
logger.error(f"Cosine similarity failed: {e}, matrix shape: {VECTOR_MATRIX.shape if VECTOR_MATRIX is not None else None}, query shape: {qv.shape}")
|
| 717 |
+
return jsonify({"error": f"Search index error: {str(e)}"}), 500
|
| 718 |
results = []
|
| 719 |
for idx in np.argsort(scores)[::-1]:
|
| 720 |
if scores[idx] < 0.3: break
|