chromadb / app.py
naughtondale's picture
Update app.py
71a030d
raw
history blame contribute delete
950 Bytes
import asyncio
from chroma import ChromaClient, TransformersEmbeddingFunction
async def main():
# Chroma client
client = ChromaClient()
# Connect to backend
# BERT embedding
embedder = TransformersEmbeddingFunction('bert-base-uncased')
# Create collection
collection = await client.createCollection({'name':'my_collection',
'embeddingFunction': embedder})
# Load document
with open('Caribbean-Artificial-Intelligence-Policy-Roadmap.txt') as f:
doc = f.read()
# Tokenize
sentences = sent_tokenize(doc)
# Generate embeddings
embeddings = []
for sentence in sentences:
embeddings.append(embedder.encode(sentence))
# Add to Chroma
await collection.add({'ids': list(range(len(sentences))),
'embeddings': embeddings,
'documents': sentences})
print("Documents indexed successfully!")
asyncio.run(main())