cmd0160 commited on
Commit
263fa05
·
1 Parent(s): e7113d1

addressing auto ingest

Browse files
Files changed (1) hide show
  1. app.py +21 -30
app.py CHANGED
@@ -1,4 +1,6 @@
1
  import os
 
 
2
 
3
  os.environ.setdefault("LANGCHAIN_TELEMETRY_ENABLED", "false")
4
  os.environ.setdefault("LANGCHAIN_DISABLE_TELEMETRY", "true")
@@ -7,7 +9,6 @@ os.environ.setdefault("CHROMA_TELEMETRY_ENABLED", "false")
7
  import streamlit as st
8
  from src.vectorstore import get_retriever
9
  from src.qa_chain import make_conversational_chain
10
- from src.ingest import ingest as run_ingest
11
 
12
  st.set_page_config(page_title="Abalone RAG Chatbot", page_icon="🐚")
13
 
@@ -35,22 +36,6 @@ top_k = st.sidebar.slider(
35
  data_dir = st.sidebar.text_input("Data directory", value="./data")
36
  persist_dir = st.sidebar.text_input("Vectorstore directory", value="./vectorstore")
37
 
38
- chunk_size = st.sidebar.number_input(
39
- "Chunk size",
40
- min_value=200,
41
- max_value=4000,
42
- value=1000,
43
- step=100,
44
- )
45
-
46
- chunk_overlap = st.sidebar.number_input(
47
- "Chunk overlap",
48
- min_value=0,
49
- max_value=1000,
50
- value=200,
51
- step=50,
52
- )
53
-
54
  st.sidebar.markdown("---")
55
  st.sidebar.caption(
56
  "If the vectorstore is missing or invalid, the app will attempt to ingest "
@@ -60,32 +45,34 @@ st.sidebar.caption(
60
  if "chat_history" not in st.session_state:
61
  st.session_state["chat_history"] = []
62
 
63
- if "retriever_initialized" not in st.session_state:
64
- st.session_state["retriever_initialized"] = False
65
-
66
  def ensure_openai_key() -> bool:
67
  if not os.environ.get("OPENAI_API_KEY"):
68
  st.error("OPENAI_API_KEY is not set.")
69
  return False
70
  return True
71
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  @st.cache_resource(show_spinner=False)
73
  def build_or_load_retriever_cached(
74
  data_dir: str,
75
  persist_dir: str,
76
  top_k: int,
77
- chunk_size: int,
78
- chunk_overlap: int,
79
  ):
80
  try:
81
  return get_retriever(persist_dir=persist_dir, top_k=top_k)
82
  except Exception:
83
- run_ingest(
84
- data_dir=data_dir,
85
- persist_dir=persist_dir,
86
- chunk_size=chunk_size,
87
- chunk_overlap=chunk_overlap,
88
- )
89
  return get_retriever(persist_dir=persist_dir, top_k=top_k)
90
 
91
  def get_or_build_retriever_with_ui():
@@ -96,9 +83,13 @@ def get_or_build_retriever_with_ui():
96
  data_dir=data_dir,
97
  persist_dir=persist_dir,
98
  top_k=top_k,
99
- chunk_size=chunk_size,
100
- chunk_overlap=chunk_overlap,
101
  )
 
 
 
 
 
 
102
  except Exception as e:
103
  st.error(
104
  "Could not initialize vectorstore.\n\n"
 
1
  import os
2
+ import sys
3
+ import subprocess
4
 
5
  os.environ.setdefault("LANGCHAIN_TELEMETRY_ENABLED", "false")
6
  os.environ.setdefault("LANGCHAIN_DISABLE_TELEMETRY", "true")
 
9
  import streamlit as st
10
  from src.vectorstore import get_retriever
11
  from src.qa_chain import make_conversational_chain
 
12
 
13
  st.set_page_config(page_title="Abalone RAG Chatbot", page_icon="🐚")
14
 
 
36
  data_dir = st.sidebar.text_input("Data directory", value="./data")
37
  persist_dir = st.sidebar.text_input("Vectorstore directory", value="./vectorstore")
38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  st.sidebar.markdown("---")
40
  st.sidebar.caption(
41
  "If the vectorstore is missing or invalid, the app will attempt to ingest "
 
45
  if "chat_history" not in st.session_state:
46
  st.session_state["chat_history"] = []
47
 
 
 
 
48
  def ensure_openai_key() -> bool:
49
  if not os.environ.get("OPENAI_API_KEY"):
50
  st.error("OPENAI_API_KEY is not set.")
51
  return False
52
  return True
53
 
54
+ def run_ingest_cli(data_dir: str, persist_dir: str):
55
+ cmd = [
56
+ sys.executable,
57
+ "-m",
58
+ "src.ingest",
59
+ "--data-dir",
60
+ data_dir,
61
+ "--persist-dir",
62
+ persist_dir,
63
+ ]
64
+ subprocess.run(cmd, check=True)
65
+
66
  @st.cache_resource(show_spinner=False)
67
  def build_or_load_retriever_cached(
68
  data_dir: str,
69
  persist_dir: str,
70
  top_k: int,
 
 
71
  ):
72
  try:
73
  return get_retriever(persist_dir=persist_dir, top_k=top_k)
74
  except Exception:
75
+ run_ingest_cli(data_dir=data_dir, persist_dir=persist_dir)
 
 
 
 
 
76
  return get_retriever(persist_dir=persist_dir, top_k=top_k)
77
 
78
  def get_or_build_retriever_with_ui():
 
83
  data_dir=data_dir,
84
  persist_dir=persist_dir,
85
  top_k=top_k,
 
 
86
  )
87
+ except subprocess.CalledProcessError as e:
88
+ st.error(
89
+ "Ingestion failed while building the vectorstore.\n\n"
90
+ f"Command exit code: {e.returncode}"
91
+ )
92
+ return None
93
  except Exception as e:
94
  st.error(
95
  "Could not initialize vectorstore.\n\n"