Spaces:

rairo
/

marka-data-api

Running

App Files Files Community

rairo commited on Apr 20

Commit

bfe1c73

verified ·

1 Parent(s): 06fc015

Update main.py

Browse files

Files changed (1) hide show

main.py +28 -10

main.py CHANGED Viewed

@@ -24,9 +24,10 @@ logger = logging.getLogger(__name__)
 SYLLABI_DIR    = "syllabi"
 PAST_EXAMS_DIR = "past_exams"
-GEMINI_API_KEY  = os.environ.get("GEMINI_API_KEY")
 EMBEDDING_MODEL = "models/text-embedding-004"
-VISION_MODEL    = "gemini-2.5-flash"
 # ---------------------------------------------------------------------------
 # COMPLETE SUBJECT REGISTRY  (all 24 PDFs on HuggingFace)
@@ -466,13 +467,25 @@ def load_index_from_firebase():
         if not fb_vectors: return False
         VECTOR_DB  = []
         valid      = []
-        for entry in (fb_vectors.values() if isinstance(fb_vectors, dict) else fb_vectors):
-            if not entry: continue
-            vec = np.array(entry["vector"])
-            VECTOR_DB.append({"vector": vec, "meta": entry["meta"]})
-            valid.append(vec)
         if valid:
-            VECTOR_MATRIX = np.vstack(valid)
         fb_exams = fb_get("data_api/exams")
         if fb_exams:
@@ -695,8 +708,13 @@ def search():
         resp = c.models.embed_content(model=EMBEDDING_MODEL, contents=q)
         qv   = np.array(resp.embeddings[0].values).reshape(1, -1)
     except Exception as e:
-        return jsonify({"error": str(e)}), 500
-    scores  = cosine_similarity(qv, VECTOR_MATRIX)[0]
     results = []
     for idx in np.argsort(scores)[::-1]:
         if scores[idx] < 0.3: break

 SYLLABI_DIR    = "syllabi"
 PAST_EXAMS_DIR = "past_exams"
+# Support both naming conventions (HuggingFace Space may use either)
+GEMINI_API_KEY  = os.environ.get("GEMINI_API_KEY") or os.environ.get("Gemini")
 EMBEDDING_MODEL = "models/text-embedding-004"
+VISION_MODEL    = "gemini-2.0-flash"
 # ---------------------------------------------------------------------------
 # COMPLETE SUBJECT REGISTRY  (all 24 PDFs on HuggingFace)
         if not fb_vectors: return False
         VECTOR_DB  = []
         valid      = []
+        expected_dim = 768  # text-embedding-004 output dimension
+        for entry in sorted(fb_vectors.keys() if isinstance(fb_vectors, dict) else range(len(fb_vectors))):
+            item = fb_vectors[entry] if isinstance(fb_vectors, dict) else fb_vectors[entry]
+            if not item: continue
+            raw_vec = item.get("vector")
+            if not raw_vec: continue
+            try:
+                vec = np.array(raw_vec, dtype=np.float32)
+                if vec.ndim != 1 or len(vec) != expected_dim:
+                    logger.warning(f"Skipping vector with wrong shape: {vec.shape}")
+                    continue
+                VECTOR_DB.append({"vector": vec, "meta": item["meta"]})
+                valid.append(vec)
+            except Exception as ve:
+                logger.warning(f"Skipping malformed vector entry: {ve}")
+                continue
         if valid:
+            VECTOR_MATRIX = np.vstack(valid).astype(np.float32)
+        logger.info(f"Vector matrix shape: {VECTOR_MATRIX.shape if VECTOR_MATRIX is not None else None}")
         fb_exams = fb_get("data_api/exams")
         if fb_exams:
         resp = c.models.embed_content(model=EMBEDDING_MODEL, contents=q)
         qv   = np.array(resp.embeddings[0].values).reshape(1, -1)
     except Exception as e:
+        logger.error(f"Embed query failed: {e}")
+        return jsonify({"error": f"Embedding failed: {str(e)}"}), 500
+    try:
+        scores  = cosine_similarity(qv, VECTOR_MATRIX)[0]
+    except Exception as e:
+        logger.error(f"Cosine similarity failed: {e}, matrix shape: {VECTOR_MATRIX.shape if VECTOR_MATRIX is not None else None}, query shape: {qv.shape}")
+        return jsonify({"error": f"Search index error: {str(e)}"}), 500
     results = []
     for idx in np.argsort(scores)[::-1]:
         if scores[idx] < 0.3: break