Zubaish commited on
Commit
9797354
·
1 Parent(s): 22fa804

Fix: resolve LangChain dependency conflict

Browse files
Files changed (2) hide show
  1. rag.py +87 -38
  2. requirements.txt +0 -1
rag.py CHANGED
@@ -1,52 +1,104 @@
1
- from transformers import AutoTokenizer, AutoModelForCausalLM
2
- from langchain_huggingface import HuggingFaceEmbeddings
3
- from langchain_chroma import Chroma
4
- from langchain_text_splitters import RecursiveCharacterTextSplitter
5
- from langchain_community.document_loaders import PyPDFLoader
6
  import os
7
 
8
- MODEL_ID = "microsoft/Phi-3-mini-4k-instruct"
 
 
 
 
 
9
 
10
- print("⏳ Loading embeddings...")
 
 
 
 
 
 
 
 
 
 
 
 
11
  embeddings = HuggingFaceEmbeddings(
12
- model_name="sentence-transformers/all-MiniLM-L6-v2"
13
  )
14
 
15
- print("⏳ Loading documents...")
16
- docs = []
17
- if os.path.exists("kb_docs"):
18
- for f in os.listdir("kb_docs"):
19
- if f.endswith(".pdf"):
20
- loader = PyPDFLoader(os.path.join("kb_docs", f))
 
 
 
 
 
 
 
21
  docs.extend(loader.load())
22
 
23
- splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)
24
- splits = splitter.split_documents(docs)
 
 
 
25
 
26
- vectorstore = Chroma.from_documents(
27
- splits,
28
- embedding=embeddings,
29
- persist_directory="./chroma_db"
30
- )
 
 
 
 
 
 
 
 
31
 
 
 
 
32
  print("⏳ Loading LLM...")
 
33
  tokenizer = AutoTokenizer.from_pretrained(
34
- MODEL_ID,
35
  trust_remote_code=True
36
  )
37
 
38
  model = AutoModelForCausalLM.from_pretrained(
39
- MODEL_ID,
40
- trust_remote_code=True
41
- ) # 👈 NO device_map, NO low_cpu_mem_usage
 
42
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  def ask_rag_with_status(question: str):
44
- retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
 
 
45
  docs = retriever.get_relevant_documents(question)
46
 
47
- context = "\n\n".join(d.page_content for d in docs)
48
 
49
- prompt = f"""Use the context below to answer the question.
 
 
 
50
 
51
  Context:
52
  {context}
@@ -54,18 +106,15 @@ Context:
54
  Question:
55
  {question}
56
 
57
- Answer:"""
 
58
 
59
- inputs = tokenizer(prompt, return_tensors="pt")
60
- outputs = model.generate(
61
- **inputs,
62
- max_new_tokens=256,
63
- do_sample=True,
64
- temperature=0.7
65
- )
66
 
67
- answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
68
  return {
69
  "answer": answer,
70
- "status": ["✅ Answer generated"]
71
  }
 
 
 
 
 
 
1
  import os
2
 
3
+ from langchain_community.document_loaders import PyPDFLoader
4
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
5
+ from langchain_community.embeddings import HuggingFaceEmbeddings
6
+ from langchain_community.vectorstores import Chroma
7
+
8
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
9
 
10
+ from config import (
11
+ KB_DIR,
12
+ PERSIST_DIR,
13
+ EMBEDDING_MODEL,
14
+ LLM_MODEL,
15
+ CHUNK_SIZE,
16
+ CHUNK_OVERLAP,
17
+ TOP_K,
18
+ )
19
+
20
+ # -----------------------------
21
+ # Load embeddings
22
+ # -----------------------------
23
  embeddings = HuggingFaceEmbeddings(
24
+ model_name=EMBEDDING_MODEL
25
  )
26
 
27
+ # -----------------------------
28
+ # Load or build vector DB
29
+ # -----------------------------
30
+ if not os.path.exists(PERSIST_DIR):
31
+ os.makedirs(PERSIST_DIR, exist_ok=True)
32
+
33
+ if not os.listdir(PERSIST_DIR):
34
+ print("⏳ Loading documents...")
35
+
36
+ docs = []
37
+ for filename in os.listdir(KB_DIR):
38
+ if filename.lower().endswith(".pdf"):
39
+ loader = PyPDFLoader(os.path.join(KB_DIR, filename))
40
  docs.extend(loader.load())
41
 
42
+ splitter = RecursiveCharacterTextSplitter(
43
+ chunk_size=CHUNK_SIZE,
44
+ chunk_overlap=CHUNK_OVERLAP
45
+ )
46
+ splits = splitter.split_documents(docs)
47
 
48
+ vectorstore = Chroma.from_documents(
49
+ documents=splits,
50
+ embedding=embeddings,
51
+ persist_directory=PERSIST_DIR
52
+ )
53
+ vectorstore.persist()
54
+ else:
55
+ vectorstore = Chroma(
56
+ persist_directory=PERSIST_DIR,
57
+ embedding_function=embeddings
58
+ )
59
+
60
+ retriever = vectorstore.as_retriever(search_kwargs={"k": TOP_K})
61
 
62
+ # -----------------------------
63
+ # Load LLM (NON-INTERACTIVE)
64
+ # -----------------------------
65
  print("⏳ Loading LLM...")
66
+
67
  tokenizer = AutoTokenizer.from_pretrained(
68
+ LLM_MODEL,
69
  trust_remote_code=True
70
  )
71
 
72
  model = AutoModelForCausalLM.from_pretrained(
73
+ LLM_MODEL,
74
+ trust_remote_code=True,
75
+ low_cpu_mem_usage=False
76
+ )
77
 
78
+ generator = pipeline(
79
+ "text-generation",
80
+ model=model,
81
+ tokenizer=tokenizer,
82
+ max_new_tokens=512,
83
+ do_sample=True,
84
+ temperature=0.3,
85
+ )
86
+
87
+ # -----------------------------
88
+ # RAG Query Function
89
+ # -----------------------------
90
  def ask_rag_with_status(question: str):
91
+ status = []
92
+
93
+ status.append("🔍 Searching knowledge base...")
94
  docs = retriever.get_relevant_documents(question)
95
 
96
+ context = "\n\n".join(doc.page_content for doc in docs)
97
 
98
+ prompt = f"""
99
+ You are a helpful assistant.
100
+ Answer the question using ONLY the context below.
101
+ If the answer is not in the context, say you don't know.
102
 
103
  Context:
104
  {context}
 
106
  Question:
107
  {question}
108
 
109
+ Answer:
110
+ """
111
 
112
+ status.append("🧠 Generating answer...")
113
+ output = generator(prompt)[0]["generated_text"]
114
+
115
+ answer = output.split("Answer:")[-1].strip()
 
 
 
116
 
 
117
  return {
118
  "answer": answer,
119
+ "status": status
120
  }
requirements.txt CHANGED
@@ -4,7 +4,6 @@ python-dotenv
4
 
5
  langchain==0.2.17
6
  langchain-community==0.2.17
7
- langchain-huggingface==0.1.0
8
  langchain-text-splitters==0.2.4
9
 
10
  chromadb==0.5.5
 
4
 
5
  langchain==0.2.17
6
  langchain-community==0.2.17
 
7
  langchain-text-splitters==0.2.4
8
 
9
  chromadb==0.5.5