Spaces:
Sleeping
Sleeping
File size: 1,965 Bytes
ee8c79f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
import pandas as pd
import json
from langchain.docstore.document import Document
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.llms import HuggingFaceHub
from langchain.chains import RetrievalQA
# file_path = "thyroidDF.csv"
# df = pd.read_csv(file_path)
def create_doucment(df):
documents = [
Document(
metadata={"id": str(i)},
# Serialize the dictionary to a JSON string
page_content=json.dumps(row.to_dict())
)
for i, row in df.iterrows()
]
return documents
def load_models_embedding():
embeddings = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-L6-v2")
return embeddings
api="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22"
def load_models_llm():
llm = HuggingFaceHub(
repo_id="Qwen/Qwen2.5-72B-Instruct",
# Replace with your token
huggingfacehub_api_token=api[:-2],
model_kwargs={"temperature": 0.5,
"max_length": 100} # Faster inference
)
return llm
def create_database(embedding, documents):
vector_store = Chroma.from_documents(documents, embedding=embedding)
return vector_store
# retriever = create_database().as_retriever()
def ask_me(question, retriever, llm):
qa_chain = RetrievalQA.from_chain_type(
retriever=retriever,
chain_type="stuff",
llm=load_models_llm(),
return_source_documents=True)
response = qa_chain.invoke({"query": question})
print("Answer:", response["result"])
# qa_chain = RetrievalQA.from_chain_type(
# retriever=retriever,
# chain_type="stuff",
# llm=llm,
# return_source_documents=True
# )
# question = "Can you provide the TSH, T3, and FTI values for patients aged 55?"
# # question = "What columns are in the dataset?"
# response = qa_chain.invoke({"query": question})
# print("Answer:", response["result"])
|