File size: 1,829 Bytes
e6a70ac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# ─────────────────────────────────────────────────────────────
# utils/embedder.py
# Handles all embedding calls via your Gradio Space API
# ─────────────────────────────────────────────────────────────

import numpy as np
from gradio_client import Client
import os


class HFEmbedder:
    """
    Calls your HF Gradio Space to get embeddings.
    No local model download needed β€” works like an API.
    """

    def __init__(self, space: str = None):
        self.space  = space or os.getenv("GRADIO_SPACE", "your-username/rag-embedder-app")
        self.client = None
        self._connect()

    def _connect(self):
        print(f"Connecting to Gradio Space: {self.space}")
        self.client = Client(self.space)
        print("Embedder ready!")

    def embed(self, text: str) -> np.ndarray:
        """Embed a single text string."""
        result = self.client.predict(text, api_name="/predict")
        return np.array(result, dtype=np.float32)

    def embed_batch(self, texts: list) -> np.ndarray:
        """Embed a list of texts."""
        print(f"Embedding {len(texts)} documents...")
        vectors = [self.embed(t) for t in texts]
        print("Embedding complete!")
        return np.array(vectors, dtype=np.float32)


# ── Quick test ────────────────────────────────────────────────
if __name__ == "__main__":
    embedder = HFEmbedder()
    vec = embedder.embed("Hello world")
    print(f"Vector shape  : {vec.shape}")
    print(f"First 5 values: {vec[:5]}")