hoshoo21 commited on
Commit
e0313cc
·
1 Parent(s): b93cb3e

removing gguf file

Browse files
Files changed (8) hide show
  1. .flaskenv +0 -2
  2. .gitignore +3 -0
  3. Custom_RAG +1 -0
  4. Dockerfile +11 -9
  5. app.py +15 -1
  6. persiststorage.db +0 -0
  7. rag_engine.py +89 -36
  8. requirements.txt +0 -0
.flaskenv DELETED
@@ -1,2 +0,0 @@
1
- FLASK_APP=app.py
2
- FLASK_ENV=development
 
 
 
.gitignore CHANGED
@@ -23,6 +23,9 @@ parts/
23
  sdist/
24
  var/
25
  wheels/
 
 
 
26
  share/python-wheels/
27
  *.egg-info/
28
  .installed.cfg
 
23
  sdist/
24
  var/
25
  wheels/
26
+ raq/
27
+ uploads/
28
+ chroma_temp/
29
  share/python-wheels/
30
  *.egg-info/
31
  .installed.cfg
Custom_RAG ADDED
@@ -0,0 +1 @@
 
 
1
+ Subproject commit b93cb3e02cd451d140ce30f823dc68db13d584c4
Dockerfile CHANGED
@@ -1,14 +1,16 @@
1
- FROM python:3.11
2
-
3
- RUN useradd -m -u 1000 user
4
- USER user
5
- ENV PATH="/home/user/.local/bin:$PATH"
6
 
 
7
  WORKDIR /app
8
 
9
- COPY --chown=user ./requirements.txt requirements.txt
10
- RUN pip install --no-cache-dir --upgrade -r requirements.txt
 
 
 
11
 
12
- COPY --chown=user . /app
13
  EXPOSE 7860
14
- CMD ["gunicorn", "-b", "0.0.0.0:7860", "app:app"]
 
 
 
1
+ From Python 3.12.7
 
 
 
 
2
 
3
+ # Step 2: Set working directory in the container
4
  WORKDIR /app
5
 
6
+ # Step 3: Copy your app files into the container
7
+ COPY . /app
8
+
9
+ # Step 4: Install dependencies
10
+ RUN pip install --no-cache-dir -r requirements.txt
11
 
12
+ # Step 5: Expose a port (optional, but useful)
13
  EXPOSE 7860
14
+
15
+ # Step 6: Command to run your app
16
+ CMD ["python", "app.py"]
app.py CHANGED
@@ -34,7 +34,9 @@ def upload_pdf():
34
  @app.route ("/stream", methods=["POST"])
35
  @cross_origin()
36
  def stream_answer():
 
37
  question = request.json.get("question", "")
 
38
  if not question.strip():
39
  return jsonify({"error": "Empty question"}), 400
40
 
@@ -57,6 +59,18 @@ def ask():
57
  return jsonify({"error": str(e)}),500
58
  return jsonify({"message": answer})
59
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
  if __name__ == "__main__":
62
- app.run(host="0.0.0.0", port=6000)
 
34
  @app.route ("/stream", methods=["POST"])
35
  @cross_origin()
36
  def stream_answer():
37
+
38
  question = request.json.get("question", "")
39
+ print (question)
40
  if not question.strip():
41
  return jsonify({"error": "Empty question"}), 400
42
 
 
59
  return jsonify({"error": str(e)}),500
60
  return jsonify({"message": answer})
61
 
62
+ @app.route("/stream_answer",methods=["POST"])
63
+ @cross_origin()
64
+ def stream_question():
65
+ data = request.get_json()
66
+ question = data.get("question","")
67
+ if not question:
68
+ return jsonify({"error": "No question provided"}),400
69
+ def event_stream():
70
+ for token in rag.stream_answer(question=question):
71
+ yield token
72
+ return Response(event_stream(), content_type ="text/event-stream")
73
+
74
 
75
  if __name__ == "__main__":
76
+ app.run(host="0.0.0.0", port=7860)
persiststorage.db CHANGED
Binary files a/persiststorage.db and b/persiststorage.db differ
 
rag_engine.py CHANGED
@@ -1,17 +1,18 @@
1
  import os
2
  import shutil
3
  import tempfile
 
4
  from langchain_community.document_loaders import PyPDFLoader
5
  from langchain_text_splitters import RecursiveCharacterTextSplitter
6
- from langchain_ollama import OllamaEmbeddings
7
- from langchain_community.vectorstores import Chroma
8
  from langchain.chains import RetrievalQA
9
- from langchain_community.llms import Ollama
 
 
10
  from book_title_extractor import BookTitleExtractor
11
  from duplicate_detector import DuplicateDetector
12
- from langchain_core.callbacks.base import BaseCallbackHandler
13
- from langchain_community.chat_models import ChatOllama
14
- class StreamingHanlder(BaseCallbackHandler):
15
  def __init__(self):
16
  self.buffer =[]
17
  self.token_callback = None
@@ -22,33 +23,34 @@ class StreamingHanlder(BaseCallbackHandler):
22
 
23
 
24
  class RagEngine:
25
- def __init__(self, embed_model= "nomic-embed-text",llm_model="qwen:1.8b", temp_dir ="chroma_temp"):
26
- self.embed_model = embed_model
27
- self.llm_model = llm_model
28
- self.embedding = OllamaEmbeddings(model=self.embed_model)
29
- self.vectorstore = None
30
- self.qa_chain = None
31
- self.handler = StreamingHanlder()
32
- self.llm = ChatOllama (model=self.llm_model, streaming= True, callbacks=[self.handler] )
33
-
34
  self.temp_dir = temp_dir
35
-
36
  os.makedirs(self.temp_dir, exist_ok=True)
37
- self.title_extractor = BookTitleExtractor(llm=self.llm)
38
  self.duplicate_detector = DuplicateDetector()
39
- if os.path.exists(os.path.join(self.temp_dir, "chroma.sqlite3")):
40
- print("🔁 Loading existing Chroma vectorstore...")
41
- self.vectorstore = Chroma(
42
- persist_directory=self.temp_dir,
43
- embedding_function=self.embedding
44
- )
45
- self.qa_chain = RetrievalQA.from_chain_type(
46
- llm=self.llm,
47
- retriever=self.vectorstore.as_retriever(),
48
- return_source_documents=True
49
- )
50
- print("Vectorstore and QA chain restored.")
51
-
 
 
 
 
52
  def clear_temp(self):
53
  shutil.rmtree(self.temp_dir,ignore_errors=True)
54
  os.makedirs(self.temp_dir, exist_ok=True)
@@ -84,13 +86,64 @@ class RagEngine:
84
  else:
85
  self.vectorstore.add_documents(chunks)
86
 
87
-
88
- self.qa_chain = RetrievalQA.from_chain_type(
89
- llm = self.llm,
90
- retriever = self.vectorstore.as_retriever(),
91
- return_source_documents = True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  )
93
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  def ask_question(self, question):
95
  print (question)
96
  if not self.qa_chain :
 
1
  import os
2
  import shutil
3
  import tempfile
4
+ from threading import Thread
5
  from langchain_community.document_loaders import PyPDFLoader
6
  from langchain_text_splitters import RecursiveCharacterTextSplitter
7
+ from langchain_community.vectorstores import Chroma
8
+ from langchain_community.embeddings import HuggingFaceEmbeddings
9
  from langchain.chains import RetrievalQA
10
+ from langchain_community.llms import HuggingFacePipeline
11
+ from transformers import TextIteratorStreamer, AutoTokenizer, AutoModelForCausalLM, pipeline
12
+
13
  from book_title_extractor import BookTitleExtractor
14
  from duplicate_detector import DuplicateDetector
15
+ class StreamingHanlder():
 
 
16
  def __init__(self):
17
  self.buffer =[]
18
  self.token_callback = None
 
23
 
24
 
25
  class RagEngine:
26
+ def _load_vectorstore(self):
27
+ if os.path.exists(os.path.join(self.persist_dir, "chroma.sqlite3")):
28
+ self.vectorstore = Chroma(
29
+ persist_directory=self.persist_dir,
30
+ embedding_function=self.embedding
31
+ )
32
+ self.retriever = self.vectorstore.as_retriever()
33
+ def __init__(self, persist_dir="chroma_store",embed_model= "nomic-embed-text",llm_model="qwen:1.8b", temp_dir ="chroma_temp"):
 
34
  self.temp_dir = temp_dir
 
35
  os.makedirs(self.temp_dir, exist_ok=True)
 
36
  self.duplicate_detector = DuplicateDetector()
37
+ self.title_extractor = BookTitleExtractor()
38
+ self.embedding = HuggingFaceEmbeddings(
39
+ model_name="sentence-transformers/all-MiniLM-L6-v2"
40
+ )
41
+
42
+ self.vectorstore =None
43
+ self.retriever = None
44
+ self.persist_dir = "chroma_temp"
45
+ self._load_vectorstore()
46
+ self.model_id = "Qwen/Qwen-1_8B-Chat"
47
+ self.tokenizer = AutoTokenizer.from_pretrained(self.model_id, trust_remote_code = True)
48
+ self.model = AutoModelForCausalLM.from_pretrained(self.model_id,
49
+ trust_remote_code = True,
50
+ device_map ="auto",
51
+ torch_dtype = "auto")
52
+ self.model.eval()
53
+
54
  def clear_temp(self):
55
  shutil.rmtree(self.temp_dir,ignore_errors=True)
56
  os.makedirs(self.temp_dir, exist_ok=True)
 
86
  else:
87
  self.vectorstore.add_documents(chunks)
88
 
89
+ self.vectorstore.persist()
90
+ self.retriever = self.vectorstore.as_retriever()
91
+
92
+ def stream_answer(self, question):
93
+ if not self.retriever:
94
+ yield "data: ❗ Please upload and index a PDF first.\n\n"
95
+ return
96
+ docs = self.retriever.get_relevant_documents(question)
97
+ if not docs:
98
+ yield "data: ❗ No relevant documents found.\n\n"
99
+ return
100
+ sources = []
101
+ for doc in docs:
102
+ title = doc.metadata.get("source", "Unknown Title")
103
+ page = doc.metadata.get("page", "Unknown Page")
104
+ sources.append(f"{title} - Page {page}")
105
+ context = "\n\n".join([doc.page_content for doc in docs[:3]])
106
+
107
+ system_prompt = "You are a helpful assistant that only replies in English."
108
+ user_prompt = f"Context:\n{context}\n\nQuestion: {question}"
109
+
110
+ prompt = (
111
+ "<|im_start|>system\nYou are a helpful assistant that only replies in English.<|im_end|>\n"
112
+ f"<|im_start|>user\nContext:\n{context}\n\nQuestion: {question}<|im_end|>\n"
113
+ "<|im_start|>assistant\n"
114
  )
115
+ print (prompt)
116
+ inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
117
+ print("🔢 Prompt token length:", inputs['input_ids'].shape[-1])
118
+ streamer = TextIteratorStreamer(
119
+ tokenizer=self.tokenizer,
120
+ skip_prompt=True,
121
+ skip_special_tokens=True
122
+ )
123
+ generation_args = {
124
+ "input_ids": inputs["input_ids"],
125
+ "attention_mask": inputs["attention_mask"],
126
+ "max_new_tokens": 512,
127
+ "streamer": streamer,
128
+ "do_sample": False,
129
+ "temperature": 0.0,
130
+ "top_p": 0.95,
131
+ }
132
+ thread = Thread(target=self.model.generate, kwargs=generation_args)
133
+ thread.start()
134
+ collected_tokens = []
135
+ for token in streamer:
136
+ if token.strip(): # Filter out whitespace
137
+ collected_tokens.append(token)
138
+
139
+ yield f"{token} "
140
+ if sources:
141
+ sources_text = "\n\n📚 **Sources:**\n" + "\n".join(set(sources))
142
+ for line in sources_text.splitlines():
143
+ if line.strip():
144
+ yield f"{line} \n"
145
+
146
+ yield "\n\n"
147
  def ask_question(self, question):
148
  print (question)
149
  if not self.qa_chain :
requirements.txt CHANGED
Binary files a/requirements.txt and b/requirements.txt differ