"""HF Space exposing Pradhap/devis-matcher as a sentence-embedding API. The fine-tuned camembert-large model (~440 MB) is loaded once at Space startup into the container's 16 GB RAM, then used to encode batches of strings on demand. The ConstructCRM backend (Render free tier, 512 MB) calls this Space instead of loading the model locally — which would OOM the worker. API surface (auto-generated by Gradio): POST /api/encode/ body: {"data": [["text 1", "text 2"]]} returns: {"data": [[[float, ...], [float, ...]]], ...} For batch efficiency, send as many texts as you can in a single call. """ import gradio as gr from sentence_transformers import SentenceTransformer MODEL_NAME = "RavenAgent/devis-matcher" print(f"[Space] Loading {MODEL_NAME} (cold start may take a minute)…") model = SentenceTransformer(MODEL_NAME) print(f"[Space] Loaded. Embedding dim: {model.get_sentence_embedding_dimension()}") def encode(texts): """Encode a list of strings into sentence embeddings.""" if texts is None: return [] if isinstance(texts, str): texts = [texts] if not isinstance(texts, list): return {"error": f"expected list[str], got {type(texts).__name__}"} texts = [t for t in texts if isinstance(t, str)] if not texts: return [] embs = model.encode(texts, convert_to_numpy=True, normalize_embeddings=False) return embs.tolist() demo = gr.Interface( fn=encode, inputs=gr.JSON(label="Texts (JSON array of strings)"), outputs=gr.JSON(label="Embeddings (2D float array)"), title="Devis Matcher Encoder", description=( f"Sentence embeddings via **{MODEL_NAME}** " "(camembert-large fine-tuned on French construction documents). " "POST a list of strings, receive a 2D array of embedding vectors." ), examples=[ [["PEINTURE SUR MURS", "Carrelage faïence salle de bain", "Plomberie cuisine"]], ], api_name="encode", flagging_mode="never", ) if __name__ == "__main__": demo.launch()