Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import json | |
| from langchain.docstore.document import Document | |
| from langchain.vectorstores import Chroma | |
| from langchain.embeddings import HuggingFaceEmbeddings | |
| from langchain.llms import HuggingFaceHub | |
| from langchain.chains import RetrievalQA | |
| import chromadb | |
| from chromadb.config import Settings | |
| from langchain_community.vectorstores import FAISS | |
| # file_path = "thyroidDF.csv" | |
| # df = pd.read_csv(file_path) | |
| def create_doucment(df): | |
| documents = [ | |
| Document( | |
| metadata={"id": str(i)}, | |
| # Serialize the dictionary to a JSON string | |
| page_content=json.dumps(row.to_dict()) | |
| ) | |
| for i, row in df.iterrows() | |
| ] | |
| return documents | |
| def load_models_embedding(): | |
| embeddings = HuggingFaceEmbeddings( | |
| model_name="sentence-transformers/all-MiniLM-L6-v2") | |
| return embeddings | |
| api="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22" | |
| def load_models_llm(): | |
| llm = HuggingFaceHub( | |
| repo_id="Qwen/Qwen2.5-72B-Instruct", | |
| # Replace with your token | |
| huggingfacehub_api_token=api[:-2], | |
| model_kwargs={"temperature": 0.5, | |
| "max_length": 100} # Faster inference | |
| ) | |
| return llm | |
| def create_database(embedding, documents): | |
| vectorstore = FAISS.from_documents(documents, embedding) | |
| return vectorstore | |
| def ask_me(question, retriever, llm): | |
| qa_chain = RetrievalQA.from_chain_type( | |
| retriever=retriever, | |
| chain_type="stuff", | |
| llm=load_models_llm(), | |
| return_source_documents=True) | |
| response = qa_chain.invoke({"query": question}) | |
| print("Answer:", response["result"]) | |
| # def create_database(embedding, documents): | |
| # # Use a local Chroma client (no server needed) | |
| # client = chromadb.Client() | |
| # # Create vector store from documents | |
| # vector_store = Chroma.from_documents(documents, embedding=embedding, client=client) | |
| # return vector_store | |
| # def create_database(embedding, documents): | |
| # # Use a local directory for Chroma database storage | |
| # vector_store = Chroma.from_documents( | |
| # documents, | |
| # embedding=embedding) | |
| # return vector_store | |
| # def create_database(embedding, documents): | |
| # # Define Chroma settings for an in-memory database | |
| # settings = Settings( | |
| # chroma_db_impl="duckdb+parquet" # Use in-memory configuration | |
| # ) | |
| # # Initialize vector store with the provided documents and embedding | |
| # vector_store = Chroma.from_documents(documents, embedding=embedding, client_settings=settings) | |
| # return vector_store | |
| # retriever = create_database().as_retriever() | |
| # qa_chain = RetrievalQA.from_chain_type( | |
| # retriever=retriever, | |
| # chain_type="stuff", | |
| # llm=llm, | |
| # return_source_documents=True | |
| # ) | |
| # question = "Can you provide the TSH, T3, and FTI values for patients aged 55?" | |
| # # question = "What columns are in the dataset?" | |
| # response = qa_chain.invoke({"query": question}) | |
| # print("Answer:", response["result"]) | |