1MR commited on
Commit
ee8c79f
·
verified ·
1 Parent(s): 22ab1ef

Update RAG.py

Browse files
Files changed (1) hide show
  1. RAG.py +71 -72
RAG.py CHANGED
@@ -1,72 +1,71 @@
1
- import pandas as pd
2
- import json
3
- from langchain.docstore.document import Document
4
- from langchain.vectorstores import Chroma
5
- from langchain.embeddings import HuggingFaceEmbeddings
6
- from langchain.llms import HuggingFaceHub
7
- from langchain.chains import RetrievalQA
8
-
9
-
10
- # file_path = "thyroidDF.csv"
11
- # df = pd.read_csv(file_path)
12
-
13
- def create_doucment(df):
14
- documents = [
15
- Document(
16
- metadata={"id": str(i)},
17
- # Serialize the dictionary to a JSON string
18
- page_content=json.dumps(row.to_dict())
19
- )
20
- for i, row in df.iterrows()
21
- ]
22
- return documents
23
-
24
-
25
- def load_models_embedding():
26
- embeddings = HuggingFaceEmbeddings(
27
- model_name="sentence-transformers/all-MiniLM-L6-v2")
28
- return embeddings
29
-
30
-
31
- def load_models_llm():
32
- llm = HuggingFaceHub(
33
- repo_id="Qwen/Qwen2.5-72B-Instruct",
34
- # Replace with your token
35
- api="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22"
36
- huggingfacehub_api_token=api[:-2],
37
- model_kwargs={"temperature": 0.5,
38
- "max_length": 100} # Faster inference
39
- )
40
- return llm
41
-
42
-
43
- def create_database(embedding, documents):
44
- vector_store = Chroma.from_documents(documents, embedding=embedding)
45
- return vector_store
46
-
47
- # retriever = create_database().as_retriever()
48
-
49
-
50
- def ask_me(question, retriever, llm):
51
-
52
- qa_chain = RetrievalQA.from_chain_type(
53
- retriever=retriever,
54
- chain_type="stuff",
55
- llm=load_models_llm(),
56
- return_source_documents=True)
57
-
58
- response = qa_chain.invoke({"query": question})
59
- print("Answer:", response["result"])
60
-
61
-
62
- # qa_chain = RetrievalQA.from_chain_type(
63
- # retriever=retriever,
64
- # chain_type="stuff",
65
- # llm=llm,
66
- # return_source_documents=True
67
- # )
68
-
69
- # question = "Can you provide the TSH, T3, and FTI values for patients aged 55?"
70
- # # question = "What columns are in the dataset?"
71
- # response = qa_chain.invoke({"query": question})
72
- # print("Answer:", response["result"])
 
1
+ import pandas as pd
2
+ import json
3
+ from langchain.docstore.document import Document
4
+ from langchain.vectorstores import Chroma
5
+ from langchain.embeddings import HuggingFaceEmbeddings
6
+ from langchain.llms import HuggingFaceHub
7
+ from langchain.chains import RetrievalQA
8
+
9
+
10
+ # file_path = "thyroidDF.csv"
11
+ # df = pd.read_csv(file_path)
12
+
13
+ def create_doucment(df):
14
+ documents = [
15
+ Document(
16
+ metadata={"id": str(i)},
17
+ # Serialize the dictionary to a JSON string
18
+ page_content=json.dumps(row.to_dict())
19
+ )
20
+ for i, row in df.iterrows()
21
+ ]
22
+ return documents
23
+
24
+
25
+ def load_models_embedding():
26
+ embeddings = HuggingFaceEmbeddings(
27
+ model_name="sentence-transformers/all-MiniLM-L6-v2")
28
+ return embeddings
29
+
30
+ api="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22"
31
+ def load_models_llm():
32
+ llm = HuggingFaceHub(
33
+ repo_id="Qwen/Qwen2.5-72B-Instruct",
34
+ # Replace with your token
35
+ huggingfacehub_api_token=api[:-2],
36
+ model_kwargs={"temperature": 0.5,
37
+ "max_length": 100} # Faster inference
38
+ )
39
+ return llm
40
+
41
+
42
+ def create_database(embedding, documents):
43
+ vector_store = Chroma.from_documents(documents, embedding=embedding)
44
+ return vector_store
45
+
46
+ # retriever = create_database().as_retriever()
47
+
48
+
49
+ def ask_me(question, retriever, llm):
50
+
51
+ qa_chain = RetrievalQA.from_chain_type(
52
+ retriever=retriever,
53
+ chain_type="stuff",
54
+ llm=load_models_llm(),
55
+ return_source_documents=True)
56
+
57
+ response = qa_chain.invoke({"query": question})
58
+ print("Answer:", response["result"])
59
+
60
+
61
+ # qa_chain = RetrievalQA.from_chain_type(
62
+ # retriever=retriever,
63
+ # chain_type="stuff",
64
+ # llm=llm,
65
+ # return_source_documents=True
66
+ # )
67
+
68
+ # question = "Can you provide the TSH, T3, and FTI values for patients aged 55?"
69
+ # # question = "What columns are in the dataset?"
70
+ # response = qa_chain.invoke({"query": question})
71
+ # print("Answer:", response["result"])