Spaces:

mgbam
/

MCP_Res

Runtime error

MCP_Res / mcp /embeddings.py

Update mcp/embeddings.py

8f75377 verified 9 months ago

1.07 kB

	# ── mcp/embeddings.py ───────────────────────────────────────────────────
	import os, asyncio
	from huggingface_hub import InferenceClient
	from sklearn.cluster import KMeans

	# Use your HF token for pro endpoints
	HF_TOKEN = os.getenv("HF_TOKEN")
	EMBED_MODEL = "sentence-transformers/all-mpnet-base-v2"
	client = InferenceClient(token=HF_TOKEN)

	async def embed_texts(texts: list[str]) -> list[list[float]]:
	"""
	Compute embeddings for a list of texts via HF Inference API.
	"""
	def _embed(t):
	return client.embed(model=EMBED_MODEL, inputs=t)
	# run in threadpool
	tasks = [asyncio.to_thread(_embed, t) for t in texts]
	return await asyncio.gather(*tasks)

	async def cluster_embeddings(embs: list[list[float]], n_clusters: int = 5) -> list[int]:
	"""
	Cluster embeddings into n_clusters, return list of cluster labels.
	"""
	kmeans = KMeans(n_clusters=n_clusters, random_state=0)
	return kmeans.fit_predict(embs).tolist()