cmd0160 commited on
Commit
0438c70
·
1 Parent(s): ee749be

Fixing deployment

Browse files
.gitignore CHANGED
@@ -11,7 +11,3 @@ __pycache__/
11
  # /data/
12
  # tests / validation artifacts
13
  # /validation/
14
- EOF
15
-
16
- git add .gitignore
17
- # do NOT commit this yet if you prefer to review
 
11
  # /data/
12
  # tests / validation artifacts
13
  # /validation/
 
 
 
 
src/ingest.py CHANGED
@@ -1,12 +1,10 @@
1
  import argparse
2
  import os
3
 
4
- <<<<<<< HEAD
5
  from langchain_community.document_loaders import DirectoryLoader, TextLoader
6
  from langchain.text_splitter import RecursiveCharacterTextSplitter
7
  from langchain_community.vectorstores import Chroma
8
  from langchain_community.embeddings import OpenAIEmbeddings
9
- =======
10
  from langchain.text_splitter import RecursiveCharacterTextSplitter
11
  from langchain_community.vectorstores import Chroma
12
  from langchain_community.embeddings import OpenAIEmbeddings
@@ -22,7 +20,6 @@ try:
22
  _HAS_KG = True
23
  except Exception:
24
  _HAS_KG = False
25
- >>>>>>> ba5a1f4 (Adding kg to deployment)
26
 
27
 
28
  def load_documents(data_dir: str):
@@ -72,8 +69,6 @@ def ingest(data_dir: str, persist_dir: str, chunk_size: int, chunk_overlap: int)
72
 
73
  os.makedirs(persist_dir, exist_ok=True)
74
 
75
- <<<<<<< HEAD
76
- =======
77
  # Prepare KG store and local chunk index
78
  chunks_index = {}
79
  kg_path = os.path.join(persist_dir, "kg_store.ttl")
@@ -125,15 +120,12 @@ def ingest(data_dir: str, persist_dir: str, chunk_size: int, chunk_overlap: int)
125
  pass
126
 
127
  # Persist Chroma vectorstore
128
- >>>>>>> ba5a1f4 (Adding kg to deployment)
129
  Chroma.from_documents(
130
  split_docs,
131
  embedding=embeddings,
132
  persist_directory=persist_dir,
133
  )
134
  print(f"Vectorstore built and persisted to {persist_dir}")
135
- <<<<<<< HEAD
136
- =======
137
 
138
  # Persist chunks index for runtime (simple json mapping)
139
  try:
@@ -150,20 +142,14 @@ def ingest(data_dir: str, persist_dir: str, chunk_size: int, chunk_overlap: int)
150
  print(f"KG persisted to {kg_path}")
151
  except Exception:
152
  pass
153
- >>>>>>> ba5a1f4 (Adding kg to deployment)
154
 
155
 
156
  def main():
157
  parser = argparse.ArgumentParser()
158
  parser.add_argument("--data-dir", type=str, default="./data")
159
  parser.add_argument("--persist-dir", type=str, default="./vectorstore")
160
- <<<<<<< HEAD
161
- parser.add_argument("--chunk-size", type=int, default=800)
162
- parser.add_argument("--chunk-overlap", type=int, default=200)
163
- =======
164
  parser.add_argument("--chunk-size", type=int, default=200)
165
  parser.add_argument("--chunk-overlap", type=int, default=50)
166
- >>>>>>> ba5a1f4 (Adding kg to deployment)
167
  args = parser.parse_args()
168
 
169
  ingest(
 
1
  import argparse
2
  import os
3
 
 
4
  from langchain_community.document_loaders import DirectoryLoader, TextLoader
5
  from langchain.text_splitter import RecursiveCharacterTextSplitter
6
  from langchain_community.vectorstores import Chroma
7
  from langchain_community.embeddings import OpenAIEmbeddings
 
8
  from langchain.text_splitter import RecursiveCharacterTextSplitter
9
  from langchain_community.vectorstores import Chroma
10
  from langchain_community.embeddings import OpenAIEmbeddings
 
20
  _HAS_KG = True
21
  except Exception:
22
  _HAS_KG = False
 
23
 
24
 
25
  def load_documents(data_dir: str):
 
69
 
70
  os.makedirs(persist_dir, exist_ok=True)
71
 
 
 
72
  # Prepare KG store and local chunk index
73
  chunks_index = {}
74
  kg_path = os.path.join(persist_dir, "kg_store.ttl")
 
120
  pass
121
 
122
  # Persist Chroma vectorstore
 
123
  Chroma.from_documents(
124
  split_docs,
125
  embedding=embeddings,
126
  persist_directory=persist_dir,
127
  )
128
  print(f"Vectorstore built and persisted to {persist_dir}")
 
 
129
 
130
  # Persist chunks index for runtime (simple json mapping)
131
  try:
 
142
  print(f"KG persisted to {kg_path}")
143
  except Exception:
144
  pass
 
145
 
146
 
147
  def main():
148
  parser = argparse.ArgumentParser()
149
  parser.add_argument("--data-dir", type=str, default="./data")
150
  parser.add_argument("--persist-dir", type=str, default="./vectorstore")
 
 
 
 
151
  parser.add_argument("--chunk-size", type=int, default=200)
152
  parser.add_argument("--chunk-overlap", type=int, default=50)
 
153
  args = parser.parse_args()
154
 
155
  ingest(
src/utils/__pycache__/rag_runtime.cpython-310.pyc CHANGED
Binary files a/src/utils/__pycache__/rag_runtime.cpython-310.pyc and b/src/utils/__pycache__/rag_runtime.cpython-310.pyc differ
 
src/utils/rag_runtime.py CHANGED
@@ -6,8 +6,6 @@ import streamlit as st
6
 
7
  from src.vectorstore import get_retriever
8
  from src.qa_chain import make_conversational_chain
9
- <<<<<<< HEAD
10
- =======
11
  import os
12
  import json
13
  from typing import Dict, List, Tuple
@@ -18,7 +16,6 @@ try:
18
  _HAS_KG = True
19
  except Exception:
20
  _HAS_KG = False
21
- >>>>>>> ba5a1f4 (Adding kg to deployment)
22
 
23
 
24
  def run_ingest_cli(data_dir: str, persist_dir: str) -> None:
@@ -31,10 +28,7 @@ def run_ingest_cli(data_dir: str, persist_dir: str) -> None:
31
  Raises:
32
  CalledProcessError: If the underlying subprocess fails.
33
  """
34
- <<<<<<< HEAD
35
- =======
36
- # Updated to point to the CLI module inside the ingest package
37
- >>>>>>> ba5a1f4 (Adding kg to deployment)
38
  cmd = [
39
  sys.executable,
40
  "-m",
@@ -46,9 +40,6 @@ def run_ingest_cli(data_dir: str, persist_dir: str) -> None:
46
  ]
47
  subprocess.run(cmd, check=True)
48
 
49
-
50
- <<<<<<< HEAD
51
- =======
52
  def _load_chunks_index(persist_dir: str) -> Dict[str, Dict]:
53
  idx_path = os.path.join(persist_dir, "chunks_index.json")
54
  if not os.path.exists(idx_path):
@@ -107,7 +98,6 @@ def answer_with_kg(
107
  return chain({"question": augmented_question, "chat_history": chat_history})
108
 
109
 
110
- >>>>>>> ba5a1f4 (Adding kg to deployment)
111
  @st.cache_resource(show_spinner=False)
112
  def build_or_load_retriever_cached(
113
  data_dir: str,
 
6
 
7
  from src.vectorstore import get_retriever
8
  from src.qa_chain import make_conversational_chain
 
 
9
  import os
10
  import json
11
  from typing import Dict, List, Tuple
 
16
  _HAS_KG = True
17
  except Exception:
18
  _HAS_KG = False
 
19
 
20
 
21
  def run_ingest_cli(data_dir: str, persist_dir: str) -> None:
 
28
  Raises:
29
  CalledProcessError: If the underlying subprocess fails.
30
  """
31
+
 
 
 
32
  cmd = [
33
  sys.executable,
34
  "-m",
 
40
  ]
41
  subprocess.run(cmd, check=True)
42
 
 
 
 
43
  def _load_chunks_index(persist_dir: str) -> Dict[str, Dict]:
44
  idx_path = os.path.join(persist_dir, "chunks_index.json")
45
  if not os.path.exists(idx_path):
 
98
  return chain({"question": augmented_question, "chat_history": chat_history})
99
 
100
 
 
101
  @st.cache_resource(show_spinner=False)
102
  def build_or_load_retriever_cached(
103
  data_dir: str,
src/vectorstore.py CHANGED
@@ -51,11 +51,8 @@ class HybridRetriever(BaseRetriever):
51
  def get_retriever(
52
  persist_dir: str,
53
  top_k: int,
54
- <<<<<<< HEAD
55
- retrieval_mode: RetrievalMode = "mmr",
56
- =======
57
- retrieval_mode: RetrievalMode = "hybrid",
58
- >>>>>>> ba5a1f4 (Adding kg to deployment)
59
  ):
60
  db = get_vectorstore(persist_dir=persist_dir)
61
  mode = retrieval_mode.lower()
 
51
  def get_retriever(
52
  persist_dir: str,
53
  top_k: int,
54
+ retrieval_mode: RetrievalMode = "hybrid"
55
+
 
 
 
56
  ):
57
  db = get_vectorstore(persist_dir=persist_dir)
58
  mode = retrieval_mode.lower()