import pandas as pd import json from langchain.docstore.document import Document from langchain.vectorstores import Chroma from langchain.embeddings import HuggingFaceEmbeddings from langchain.llms import HuggingFaceHub from langchain.chains import RetrievalQA # file_path = "thyroidDF.csv" # df = pd.read_csv(file_path) def create_doucment(df): documents = [ Document( metadata={"id": str(i)}, # Serialize the dictionary to a JSON string page_content=json.dumps(row.to_dict()) ) for i, row in df.iterrows() ] return documents def load_models_embedding(): embeddings = HuggingFaceEmbeddings( model_name="sentence-transformers/all-MiniLM-L6-v2") return embeddings api="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22" def load_models_llm(): llm = HuggingFaceHub( repo_id="Qwen/Qwen2.5-72B-Instruct", # Replace with your token huggingfacehub_api_token=api[:-2], model_kwargs={"temperature": 0.5, "max_length": 100} # Faster inference ) return llm def create_database(embedding, documents): vector_store = Chroma.from_documents(documents, embedding=embedding) return vector_store # retriever = create_database().as_retriever() def ask_me(question, retriever, llm): qa_chain = RetrievalQA.from_chain_type( retriever=retriever, chain_type="stuff", llm=load_models_llm(), return_source_documents=True) response = qa_chain.invoke({"query": question}) print("Answer:", response["result"]) # qa_chain = RetrievalQA.from_chain_type( # retriever=retriever, # chain_type="stuff", # llm=llm, # return_source_documents=True # ) # question = "Can you provide the TSH, T3, and FTI values for patients aged 55?" # # question = "What columns are in the dataset?" # response = qa_chain.invoke({"query": question}) # print("Answer:", response["result"])