Spaces:
Sleeping
Sleeping
File size: 1,333 Bytes
4d2d127 cbfbca9 4d2d127 a8f4c3e 4d2d127 cbfbca9 85ac7c1 cbfbca9 4d2d127 a8f4c3e 4d2d127 e07114a 4d2d127 e07114a 0207eb5 4d2d127 e07114a 4d2d127 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 | import os
from pinecone import Pinecone, ServerlessSpec
from openai import OpenAI
from dotenv import load_dotenv
load_dotenv()
pc = Pinecone(api_key=os.environ["PINECONE_API_KEY"])
if 'your-index' not in pc.list_indexes().names():
pc.create_index(
name='your-index',
dimension=1536,
metric='cosine',
spec=ServerlessSpec(
cloud='aws',
region='us-east-1'
)
)
client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
def upsert_texts(texts, ids):
embeddings = []
for text in texts:
response = client.embeddings.create(
model="text-embedding-ada-002",
input=text
)
embeddings.append(response.data[0].embedding)
pc = Pinecone(api_key=os.environ["PINECONE_API_KEY"])
index = pc.Index("your-index")
vectors = []
for id, emb, text in zip(ids, embeddings, texts):
vectors.append((id, emb, {"text": text}))
index.upsert(vectors)
def query_text(query, top_k=5):
pc = Pinecone(api_key=os.environ["PINECONE_API_KEY"])
index = pc.Index('your-index')
embedding = client.embeddings.create(
model="text-embedding-ada-002",
input=query
).data[0].embedding
results = index.query(vector=embedding, top_k=top_k, include_metadata=True)
return results |