Spaces:
Runtime error
Runtime error
File size: 1,339 Bytes
e4d11a3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
from datasets import load_dataset
from langchain_ollama import OllamaEmbeddings
from langchain_chroma import Chroma
from langchain_core.documents import Document
import os
def buscar_contexto(pergunta, k=5):
docs = vector_store.similarity_search(pergunta, k=k)
return "\n".join([doc.page_content for doc in docs])
embeddings = OllamaEmbeddings(model="mxbai-embed-large")
dataset = load_dataset("qiaojin/PubMedQA", "pqa_labeled")
db_location = "./chroma_db"
add_documents = not os.path.exists(db_location)
documents = []
if add_documents:
for row in dataset["train"]:
question = row["question"]
context_chunk = row["context"]["contexts"]
full_context = "\n".join(context_chunk)
answer = row["long_answer"]
doc = Document(
page_content=f"Pergunta: {question}\nContexto: {full_context}\nResposta: {answer}",
metadata = {"pubid": row["pubid"], "final_decision": row["final_decision"], "meshes": row["context"]["meshes"],"labels": row["context"]["labels"]}
)
documents.append(doc)
vector_store = Chroma.from_documents(
documents=documents,
embedding=embeddings,
persist_directory=db_location
)
else:
vector_store = Chroma(
embedding_function=embeddings,
persist_directory=db_location
) |