Spaces:

Shubham-10000
/

RAG-enabled-AI-assistant

Sleeping

App Files Files Community

Shubham 10000 commited on Oct 26, 2025

Commit

bf11009

1 Parent(s): a3517b2

version 2.0 storage file & requirment for sentence changes

Browse files

Files changed (2) hide show

requirements.txt +2 -1
storage.py +175 -99

requirements.txt CHANGED Viewed

@@ -1,4 +1,5 @@
 streamlit>=1.20.0
 requests>=2.28.0
 pypdf>=3.0.0
-numpy>=1.23.0

 streamlit>=1.20.0
 requests>=2.28.0
 pypdf>=3.0.0
+numpy>=1.23.0
+sentence-transformers>=2.2.2

storage.py CHANGED Viewed

@@ -13,16 +13,11 @@ logging.basicConfig(level=logging.INFO)
 class VectorIndex:
     """
-    Lightweight vector index using Hugging Face Inference API for embeddings
-    and NumPy for similarity search. Supports .pdf and .txt uploads.
-    Stores data under a runtime directory (ephemeral in Spaces).
-    To operate:
-      - Set HF_HUB_TOKEN as a Space Secret (recommended) or env var.
-      - Default embedding model: "sentence-transformers/all-MiniLM-L6-v2".
-      - If the HF model is not accessible from the Inference API, either pick a public model
-        that supports embeddings or enable local fallback (use_local_fallback=True) and
-        install sentence-transformers in requirements.txt.
     """
     def __init__(
@@ -61,6 +56,7 @@ class VectorIndex:
             if os.path.exists(self.emb_path):
                 self.embeddings = np.load(self.emb_path)
             if self.embeddings is None:
                 self.embeddings = np.zeros((0, 384), dtype=np.float32)
             logger.info(f"Loaded store: {len(self.doc_store)} chunks")
         except Exception as e:
@@ -111,93 +107,177 @@ class VectorIndex:
         return chunks
     # ---------------- embeddings via HF Inference API ---------------- #
-    def _get_embeddings_api(self, texts: List[str]) -> List[List[float]]:
         """
-        Use HF Inference API at /models/{model} endpoint.
-        Returns list of vectors for each input text.
-        Raises RuntimeError containing HF response body for easy debugging.
         """
         import requests
         model_path = self.embedding_model
-        # Use new models endpoint (more robust than pipeline/... path)
         url = f"https://api-inference.huggingface.co/models/{model_path}"
         headers = {"Content-Type": "application/json"}
         if self.hf_token:
             headers["Authorization"] = f"Bearer {self.hf_token}"
-        payload = {"inputs": texts}
-        try:
-            resp = requests.post(url, headers=headers, json=payload, timeout=90)
-            # Log and surface HF error responses (status >= 400)
-            if resp.status_code >= 400:
-                body_text = None
                 try:
-                    body_text = resp.json()
-                except Exception:
-                    body_text = resp.text
-                logger.error(f"HF Inference error: status={resp.status_code}, body={body_text}")
-                if resp.status_code == 403:
-                    if self.hf_token:
-                        raise RuntimeError(
-                            f"HF Inference API 403 Forbidden. Response: {body_text}. "
-                            "Your HF_HUB_TOKEN may be invalid/expired or lacks model access. "
-                            "Check the token in Space Secrets and the model permissions."
-                        )
-                    else:
-                        raise RuntimeError(
-                            f"HF Inference API 403 Forbidden. Response: {body_text}. "
-                            "Unauthenticated requests may be blocked for this model. Add HF_HUB_TOKEN to Space Secrets."
-                        )
-                if resp.status_code == 404:
-                    raise RuntimeError(
-                        f"HF Inference API 404 Not Found. Response: {body_text}. Model path may be incorrect: '{model_path}'. "
-                        "Try a public model or verify the model id."
-                    )
-                raise RuntimeError(f"HF Inference API HTTP {resp.status_code}. Response: {body_text}")
-            data = resp.json()
-            vectors = []
-            # Normalize possible returned shapes: pooled vector or token vectors
-            for item in data:
-                if isinstance(item, list) and item and all(isinstance(x, (int, float)) for x in item):
-                    vectors.append(item)
-                elif isinstance(item, list) and item and isinstance(item[0], list):
-                    token_vecs = np.asarray(item, dtype=np.float32)
-                    if token_vecs.ndim == 2:
-                        avg = token_vecs.mean(axis=0).tolist()
-                        vectors.append(avg)
-                    else:
-                        vectors.append(token_vecs.flatten().tolist())
-                else:
-                    raise ValueError("Unexpected embedding format from HF Inference API")
-            if not vectors or len(vectors) != len(texts):
-                raise RuntimeError("Embeddings API returned unexpected number of vectors.")
-            return vectors
         except Exception as e:
-            # Attempt local fallback if enabled
-            if self.use_local_fallback:
-                try:
-                    from sentence_transformers import SentenceTransformer
-                except Exception as imp_err:
-                    raise RuntimeError(
-                        f"Embedding API failed: {e}. Local fallback requested but sentence-transformers is not installed: {imp_err}"
-                    )
-                try:
-                    local_model_name = model_path.split("sentence-transformers/")[-1]
-                    model = SentenceTransformer(local_model_name)
-                    emb = model.encode(texts, convert_to_numpy=True)
-                    return emb.tolist()
-                except Exception as local_e:
-                    raise RuntimeError(f"Embedding API failed: {e}. Local fallback also failed: {local_e}")
-            raise RuntimeError(
-                f"Embedding API failed: {e}. If you see a 403, check HF_HUB_TOKEN and model access. "
-                "Add HF_HUB_TOKEN to Space Secrets or enable a local fallback with sentence-transformers."
-            )
     # ---------------- index operations ---------------- #
     def add_file(self, file_path: str, source: str = "user-upload", metadata: dict = None) -> int:
@@ -233,15 +313,13 @@ class VectorIndex:
                         )
                     self.embeddings = np.vstack([self.embeddings, vecs])
                 for j, chunk in enumerate(batch):
-                    self.doc_store.append(
-                        {
-                            "chunk_id": str(uuid.uuid4()),
-                            "content": chunk,
-                            "source": source,
-                            "metadata": metadata or {},
-                            "vector_idx": len(self.doc_store),
-                        }
-                    )
                 added += len(batch)
             self._persist()
         logger.info(f"Added {added} chunks from {os.path.basename(file_path)}")
@@ -266,14 +344,12 @@ class VectorIndex:
             results = []
             for idx in idxs:
                 entry = self.doc_store[idx]
-                results.append(
-                    {
-                        "content": entry["content"],
-                        "metadata": entry.get("metadata", {}),
-                        "source": entry.get("source"),
-                        "score": float(sims[idx]),
-                    }
-                )
             return results
     def list_documents(self) -> List[Dict]:

 class VectorIndex:
     """
+    Robust VectorIndex for HF Inference API embeddings with multiple request shape fallbacks
+    and optional local sentence-transformers fallback.
+    Usage:
+      vi = VectorIndex(storage_dir="/tmp/vector_data", hf_token_env_value=HF_HUB_TOKEN, use_local_fallback=False)
     """
     def __init__(
             if os.path.exists(self.emb_path):
                 self.embeddings = np.load(self.emb_path)
             if self.embeddings is None:
+                # default shape if no embeddings yet
                 self.embeddings = np.zeros((0, 384), dtype=np.float32)
             logger.info(f"Loaded store: {len(self.doc_store)} chunks")
         except Exception as e:
         return chunks
     # ---------------- embeddings via HF Inference API ---------------- #
+    def _call_hf(self, url: str, headers: dict, payload) -> Dict:
         """
+        Helper to call HF Inference models endpoint and return (status_code, body).
         """
         import requests
+        resp = requests.post(url, headers=headers, json=payload, timeout=90)
+        # attempt to parse body
+        body = None
+        try:
+            body = resp.json()
+        except Exception:
+            body = resp.text
+        return {"status": resp.status_code, "body": body, "raw": resp}
+    def _parse_embedding_response(self, data, expected_len: int) -> List[List[float]]:
+        """
+        Parse known embedding shapes from HF response body into list-of-vectors.
+        Raises on unexpected formats.
+        """
+        vectors = []
+        # If the model returned a dict containing embeddings under some key, try to find them
+        if isinstance(data, dict):
+            # common key candidates
+            for key in ("embeddings", "embedding", "vectors", "array"):
+                if key in data:
+                    data = data[key]
+                    break
+        if isinstance(data, list):
+            # Case: list of vectors or list of token vectors per input
+            # If each item is a list of floats -> direct
+            if all(isinstance(item, list) and item and all(isinstance(x, (int, float)) for x in item) for item in data):
+                # Might be list-of-vectors for batch
+                if len(data) == expected_len:
+                    return [list(map(float, v)) for v in data]
+                # If returned token vectors for a single input, handle below
+            # If data is a nested list (token vectors), try averaging per item
+            # Try to coerce one vector per input
+            # Heuristic: if len(data) == expected_len and each entry is vector -> done
+            # If len(data) == 1 and expected_len >1, maybe API returned single vector for first input
+            # Fallback: if length mismatch but elements are lists of lists (token vectors), average them
+            out = []
+            for item in data:
+                if isinstance(item, list) and item and all(isinstance(x, (int, float)) for x in item):
+                    out.append([float(x) for x in item])
+                elif isinstance(item, list) and item and isinstance(item[0], list):
+                    arr = np.asarray(item, dtype=np.float32)
+                    if arr.ndim == 2:
+                        out.append(arr.mean(axis=0).tolist())
+                    else:
+                        out.append(arr.flatten().tolist())
+                else:
+                    # unknown item shape
+                    raise ValueError("Unexpected embedding item format")
+            if len(out) == expected_len:
+                return out
+            # If out length differs, but equals 1 and expected >1, maybe API returned pooled vector for all inputs -> broadcast
+            if len(out) == 1 and expected_len > 1:
+                return [out[0] for _ in range(expected_len)]
+            return out
+        raise ValueError("Unexpected embedding response format")
+    def _get_embeddings_api(self, texts: List[str]) -> List[List[float]]:
+        """
+        Robust embedding retrieval that attempts multiple request formats to handle different hosted pipeline types.
+        Tries:
+          1) batch inputs: {"inputs": texts}
+          2) per-text calls: {"inputs": single_text} for each text
+          3) similarity-style: {"inputs": {"sentences": texts}} or {"inputs": {"sentence": texts}}
+        If all fail and use_local_fallback=True, tries local sentence-transformers.
+        Surfaces HF response body in raised errors for debugging.
+        """
+        import requests  # local import for runtime environments
         model_path = self.embedding_model
         url = f"https://api-inference.huggingface.co/models/{model_path}"
         headers = {"Content-Type": "application/json"}
         if self.hf_token:
             headers["Authorization"] = f"Bearer {self.hf_token}"
+        attempts = []
+        # Attempt A: batch inputs (most common)
+        try:
+            payload = {"inputs": texts}
+            res = self._call_hf(url, headers, payload)
+            attempts.append(("batch", res))
+            if res["status"] < 400:
                 try:
+                    return self._parse_embedding_response(res["body"], len(texts))
+                except Exception as e:
+                    # parsing failed; proceed to next attempt
+                    logger.info(f"Batch parse failed: {e}")
+        except Exception as e:
+            logger.info(f"Batch request failed: {e}")
+        # Attempt B: single-item calls (some models only accept single input)
+        try:
+            per_vecs = []
+            ok = True
+            for t in texts:
+                payload = {"inputs": t}
+                res = self._call_hf(url, headers, payload)
+                attempts.append(("single", res))
+                if res["status"] >= 400:
+                    ok = False
+                    break
+                try:
+                    parsed = self._parse_embedding_response(res["body"], 1)
+                    per_vecs.extend(parsed)
+                except Exception as e:
+                    logger.info(f"Single parse failed for input: {e}")
+                    ok = False
+                    break
+            if ok and len(per_vecs) == len(texts):
+                return per_vecs
+        except Exception as e:
+            logger.info(f"Single-item requests failed: {e}")
+        # Attempt C: similarity-style payloads
+        try:
+            for key in ("sentences", "sentence", "texts"):
+                payload = {"inputs": {key: texts}}
+                res = self._call_hf(url, headers, payload)
+                attempts.append((f"key:{key}", res))
+                if res["status"] < 400:
+                    try:
+                        return self._parse_embedding_response(res["body"], len(texts))
+                    except Exception as e:
+                        logger.info(f"Parse after key {key} failed: {e}")
         except Exception as e:
+            logger.info(f"Similarity-key attempts failed: {e}")
+        # If reached here all HF attempts failed
+        # Build an informative error showing the attempts and last HF body if available
+        last_body = None
+        last_status = None
+        if attempts:
+            last_status = attempts[-1][1]["status"]
+            last_body = attempts[-1][1]["body"]
+        # Log all attempts for debugging
+        logger.error("HF embedding attempts failed. Attempts summary:")
+        for name, res in attempts:
+            logger.error(f"Attempt '{name}': status={res['status']}, body={res['body']}")
+        # Optional local fallback
+        if self.use_local_fallback:
+            try:
+                from sentence_transformers import SentenceTransformer
+            except Exception as imp_err:
+                raise RuntimeError(
+                    f"Embedding API failed (HF attempts). Last status={last_status}, body={last_body}. "
+                    f"Local fallback requested but sentence-transformers not installed: {imp_err}"
+                )
+            try:
+                local_model_name = model_path.split("sentence-transformers/")[-1]
+                model = SentenceTransformer(local_model_name)
+                emb = model.encode(texts, convert_to_numpy=True)
+                return emb.tolist()
+            except Exception as local_e:
+                raise RuntimeError(
+                    f"Embedding API failed (HF attempts). Last status={last_status}, body={last_body}. "
+                    f"Local fallback also failed: {local_e}"
+                )
+        # No fallback: raise with HF details
+        raise RuntimeError(
+            f"Embedding API failed after multiple request formats. Last status={last_status}, body={last_body}. "
+            "If you see 403, check HF_HUB_TOKEN and model access. Consider enabling local fallback with sentence-transformers."
+        )
     # ---------------- index operations ---------------- #
     def add_file(self, file_path: str, source: str = "user-upload", metadata: dict = None) -> int:
                         )
                     self.embeddings = np.vstack([self.embeddings, vecs])
                 for j, chunk in enumerate(batch):
+                    self.doc_store.append({
+                        "chunk_id": str(uuid.uuid4()),
+                        "content": chunk,
+                        "source": source,
+                        "metadata": metadata or {},
+                        "vector_idx": len(self.doc_store),
+                    })
                 added += len(batch)
             self._persist()
         logger.info(f"Added {added} chunks from {os.path.basename(file_path)}")
             results = []
             for idx in idxs:
                 entry = self.doc_store[idx]
+                results.append({
+                    "content": entry["content"],
+                    "metadata": entry.get("metadata", {}),
+                    "source": entry.get("source"),
+                    "score": float(sims[idx]),
+                })
             return results
     def list_documents(self) -> List[Dict]: