File size: 2,990 Bytes
ee8c79f
 
 
 
 
 
 
8dce6be
80f9862
e0a584d
 
8dce6be
ee8c79f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9c0f0ae
48fce40
e0a584d
 
48fce40
 
 
 
 
 
 
 
 
 
 
 
 
 
9c0f0ae
ba74a91
 
 
 
 
9c0f0ae
 
512f7e3
 
 
 
 
 
 
a0d2df7
48fce40
 
 
 
 
 
a0d2df7
32de980
ee8c79f
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import pandas as pd
import json
from langchain.docstore.document import Document
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.llms import HuggingFaceHub
from langchain.chains import RetrievalQA
import chromadb
from chromadb.config import Settings
from langchain_community.vectorstores import FAISS




# file_path = "thyroidDF.csv"
# df = pd.read_csv(file_path)

def create_doucment(df):
    documents = [
        Document(
            metadata={"id": str(i)},
            # Serialize the dictionary to a JSON string
            page_content=json.dumps(row.to_dict())
        )
        for i, row in df.iterrows()
    ]
    return documents


def load_models_embedding():
    embeddings = HuggingFaceEmbeddings(
        model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embeddings

api="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22"
def load_models_llm():
    llm = HuggingFaceHub(
        repo_id="Qwen/Qwen2.5-72B-Instruct",
        # Replace with your token
        huggingfacehub_api_token=api[:-2],
        model_kwargs={"temperature": 0.5,
                      "max_length": 100}  # Faster inference
    )
    return llm
    
def create_database(embedding, documents):
    vectorstore = FAISS.from_documents(documents, embedding)
    return vectorstore



def ask_me(question, retriever, llm):

    qa_chain = RetrievalQA.from_chain_type(
        retriever=retriever,
        chain_type="stuff",
        llm=load_models_llm(),
        return_source_documents=True)

    response = qa_chain.invoke({"query": question})
    print("Answer:", response["result"])

# def create_database(embedding, documents):
#     # Use a local Chroma client (no server needed)
#     client = chromadb.Client()

#     # Create vector store from documents
#     vector_store = Chroma.from_documents(documents, embedding=embedding, client=client)
#     return vector_store

# def create_database(embedding, documents):
#     # Use a local directory for Chroma database storage
#     vector_store = Chroma.from_documents(
#         documents,
#         embedding=embedding)
#     return vector_store

# def create_database(embedding, documents):
#     # Define Chroma settings for an in-memory database
#     settings = Settings(
#         chroma_db_impl="duckdb+parquet"  # Use in-memory configuration
#     )
#     # Initialize vector store with the provided documents and embedding
#     vector_store = Chroma.from_documents(documents, embedding=embedding, client_settings=settings)
#     return vector_store
# retriever = create_database().as_retriever()

# qa_chain = RetrievalQA.from_chain_type(
#     retriever=retriever,
#     chain_type="stuff",
#     llm=llm,
#     return_source_documents=True
# )

# question = "Can you provide the TSH, T3, and FTI values for patients aged 55?"
# # question = "What columns are in the dataset?"
# response = qa_chain.invoke({"query": question})
# print("Answer:", response["result"])