rairo commited on
Commit
bfe1c73
·
verified ·
1 Parent(s): 06fc015

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +28 -10
main.py CHANGED
@@ -24,9 +24,10 @@ logger = logging.getLogger(__name__)
24
  SYLLABI_DIR = "syllabi"
25
  PAST_EXAMS_DIR = "past_exams"
26
 
27
- GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
 
28
  EMBEDDING_MODEL = "models/text-embedding-004"
29
- VISION_MODEL = "gemini-2.5-flash"
30
 
31
  # ---------------------------------------------------------------------------
32
  # COMPLETE SUBJECT REGISTRY (all 24 PDFs on HuggingFace)
@@ -466,13 +467,25 @@ def load_index_from_firebase():
466
  if not fb_vectors: return False
467
  VECTOR_DB = []
468
  valid = []
469
- for entry in (fb_vectors.values() if isinstance(fb_vectors, dict) else fb_vectors):
470
- if not entry: continue
471
- vec = np.array(entry["vector"])
472
- VECTOR_DB.append({"vector": vec, "meta": entry["meta"]})
473
- valid.append(vec)
 
 
 
 
 
 
 
 
 
 
 
474
  if valid:
475
- VECTOR_MATRIX = np.vstack(valid)
 
476
 
477
  fb_exams = fb_get("data_api/exams")
478
  if fb_exams:
@@ -695,8 +708,13 @@ def search():
695
  resp = c.models.embed_content(model=EMBEDDING_MODEL, contents=q)
696
  qv = np.array(resp.embeddings[0].values).reshape(1, -1)
697
  except Exception as e:
698
- return jsonify({"error": str(e)}), 500
699
- scores = cosine_similarity(qv, VECTOR_MATRIX)[0]
 
 
 
 
 
700
  results = []
701
  for idx in np.argsort(scores)[::-1]:
702
  if scores[idx] < 0.3: break
 
24
  SYLLABI_DIR = "syllabi"
25
  PAST_EXAMS_DIR = "past_exams"
26
 
27
+ # Support both naming conventions (HuggingFace Space may use either)
28
+ GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY") or os.environ.get("Gemini")
29
  EMBEDDING_MODEL = "models/text-embedding-004"
30
+ VISION_MODEL = "gemini-2.0-flash"
31
 
32
  # ---------------------------------------------------------------------------
33
  # COMPLETE SUBJECT REGISTRY (all 24 PDFs on HuggingFace)
 
467
  if not fb_vectors: return False
468
  VECTOR_DB = []
469
  valid = []
470
+ expected_dim = 768 # text-embedding-004 output dimension
471
+ for entry in sorted(fb_vectors.keys() if isinstance(fb_vectors, dict) else range(len(fb_vectors))):
472
+ item = fb_vectors[entry] if isinstance(fb_vectors, dict) else fb_vectors[entry]
473
+ if not item: continue
474
+ raw_vec = item.get("vector")
475
+ if not raw_vec: continue
476
+ try:
477
+ vec = np.array(raw_vec, dtype=np.float32)
478
+ if vec.ndim != 1 or len(vec) != expected_dim:
479
+ logger.warning(f"Skipping vector with wrong shape: {vec.shape}")
480
+ continue
481
+ VECTOR_DB.append({"vector": vec, "meta": item["meta"]})
482
+ valid.append(vec)
483
+ except Exception as ve:
484
+ logger.warning(f"Skipping malformed vector entry: {ve}")
485
+ continue
486
  if valid:
487
+ VECTOR_MATRIX = np.vstack(valid).astype(np.float32)
488
+ logger.info(f"Vector matrix shape: {VECTOR_MATRIX.shape if VECTOR_MATRIX is not None else None}")
489
 
490
  fb_exams = fb_get("data_api/exams")
491
  if fb_exams:
 
708
  resp = c.models.embed_content(model=EMBEDDING_MODEL, contents=q)
709
  qv = np.array(resp.embeddings[0].values).reshape(1, -1)
710
  except Exception as e:
711
+ logger.error(f"Embed query failed: {e}")
712
+ return jsonify({"error": f"Embedding failed: {str(e)}"}), 500
713
+ try:
714
+ scores = cosine_similarity(qv, VECTOR_MATRIX)[0]
715
+ except Exception as e:
716
+ logger.error(f"Cosine similarity failed: {e}, matrix shape: {VECTOR_MATRIX.shape if VECTOR_MATRIX is not None else None}, query shape: {qv.shape}")
717
+ return jsonify({"error": f"Search index error: {str(e)}"}), 500
718
  results = []
719
  for idx in np.argsort(scores)[::-1]:
720
  if scores[idx] < 0.3: break