{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "e7caa8ad", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(4, 320)\n", "tensor([[ 1.0000, 0.1580, -0.4305, -0.5529],\n", " [ 0.1580, 1.0000, 0.6916, 0.6522],\n", " [-0.4305, 0.6916, 1.0000, 0.9836],\n", " [-0.5529, 0.6522, 0.9836, 1.0000]])\n" ] } ], "source": [ "from sentence_transformers import SentenceTransformer\n", "\n", "# Download from the 🤗 Hub\n", "model = SentenceTransformer(\"gbyuvd/miniChembed-prototype\")\n", "# Run inference\n", "sentences = [\n", " 'O=C1/C=C\\\\C=C2/N1C[C@@H]3CNC[C@H]2C3', # Cytisine\n", " \"n1c2cc3c(cc2ncc1)[C@@H]4CNC[C@H]3C4\", # Varenicline\n", " \"c1ncccc1[C@@H]2CCCN2C\", # Nicotine\n", " 'Nc1nc2cncc-2co1', # CID: 162789184 \n", "]\n", "embeddings = model.encode(sentences)\n", "print(embeddings.shape)\n", "# (4, 768)\n", "\n", "# Get the similarity scores for the embeddings\n", "similarities = model.similarity(embeddings, embeddings)\n", "print(similarities)\n", "# tensor([[1.0000, 0.4397, 0.3172, 0.0382],\n", "# [0.4397, 1.0000, 0.2543, 0.1725],\n", "# [0.3172, 0.2543, 1.0000, 0.2371],\n", "# [0.0382, 0.1725, 0.2371, 1.0000]])\n" ] } ], "metadata": { "kernelspec": { "display_name": "base", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.13.0" } }, "nbformat": 4, "nbformat_minor": 5 }