Zubaish commited on
Commit
e598db4
·
1 Parent(s): 3f76bb4

Fix dependency conflict; switch to transformers LLM

Browse files
Files changed (2) hide show
  1. rag.py +37 -11
  2. requirements.txt +2 -1
rag.py CHANGED
@@ -1,7 +1,10 @@
1
  from langchain_community.vectorstores import Chroma
2
- from langchain_huggingface import HuggingFaceEmbeddings, ChatHuggingFace
3
  from langchain.schema import SystemMessage, HumanMessage
4
 
 
 
 
5
  from ingest import load_and_split_docs
6
 
7
  print("⏳ Loading documents...")
@@ -21,13 +24,28 @@ if documents:
21
  else:
22
  retriever = None
23
 
24
- llm = ChatHuggingFace(
25
- repo_id="microsoft/Phi-4-mini-instruct",
26
- temperature=0.2
 
 
 
 
27
  )
28
 
29
  print("✅ RAG initialized.")
30
 
 
 
 
 
 
 
 
 
 
 
 
31
  def ask_rag_with_status(question: str):
32
  if not retriever:
33
  return {
@@ -38,14 +56,22 @@ def ask_rag_with_status(question: str):
38
  docs = retriever.get_relevant_documents(question)
39
  context = "\n\n".join(d.page_content for d in docs)
40
 
41
- messages = [
42
- SystemMessage(content="Answer using only the provided context."),
43
- HumanMessage(content=f"Context:\n{context}\n\nQuestion: {question}")
44
- ]
 
 
 
 
 
 
 
 
45
 
46
- response = llm.invoke(messages)
47
 
48
  return {
49
- "status": ["🔍 Retrieved documents", "🧠 Generating answer"],
50
- "answer": response.content
51
  }
 
1
  from langchain_community.vectorstores import Chroma
2
+ from langchain_community.embeddings import HuggingFaceEmbeddings
3
  from langchain.schema import SystemMessage, HumanMessage
4
 
5
+ from transformers import AutoTokenizer, AutoModelForCausalLM
6
+ import torch
7
+
8
  from ingest import load_and_split_docs
9
 
10
  print("⏳ Loading documents...")
 
24
  else:
25
  retriever = None
26
 
27
+ print("⏳ Loading LLM...")
28
+
29
+ tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
30
+ model = AutoModelForCausalLM.from_pretrained(
31
+ "microsoft/Phi-3-mini-4k-instruct",
32
+ torch_dtype=torch.float32,
33
+ device_map="cpu"
34
  )
35
 
36
  print("✅ RAG initialized.")
37
 
38
+ def generate(prompt: str) -> str:
39
+ inputs = tokenizer(prompt, return_tensors="pt")
40
+ with torch.no_grad():
41
+ outputs = model.generate(
42
+ **inputs,
43
+ max_new_tokens=300,
44
+ temperature=0.2,
45
+ do_sample=True
46
+ )
47
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
48
+
49
  def ask_rag_with_status(question: str):
50
  if not retriever:
51
  return {
 
56
  docs = retriever.get_relevant_documents(question)
57
  context = "\n\n".join(d.page_content for d in docs)
58
 
59
+ prompt = f"""
60
+ You are a helpful assistant.
61
+ Answer ONLY using the context below.
62
+
63
+ Context:
64
+ {context}
65
+
66
+ Question:
67
+ {question}
68
+
69
+ Answer:
70
+ """
71
 
72
+ answer = generate(prompt)
73
 
74
  return {
75
+ "status": ["🔍 Retrieved documents", "🧠 Generated answer"],
76
+ "answer": answer
77
  }
requirements.txt CHANGED
@@ -5,10 +5,11 @@ python-dotenv
5
 
6
  langchain==0.2.17
7
  langchain-community==0.2.17
8
- langchain-huggingface==0.1.0
9
 
10
  chromadb==0.5.5
11
  sentence-transformers
 
 
12
 
13
  pypdf
14
  huggingface_hub>=0.33.4,<1.0.0
 
5
 
6
  langchain==0.2.17
7
  langchain-community==0.2.17
 
8
 
9
  chromadb==0.5.5
10
  sentence-transformers
11
+ transformers==4.39.3
12
+ torch
13
 
14
  pypdf
15
  huggingface_hub>=0.33.4,<1.0.0