kamkol commited on
Commit
f0d359c
Β·
1 Parent(s): 5b2b3fe

Add debugging logging

Browse files
Files changed (4) hide show
  1. .DS_Store +0 -0
  2. Dockerfile +5 -1
  3. app/app.py +48 -20
  4. app/debug.py +67 -0
.DS_Store CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
 
Dockerfile CHANGED
@@ -19,6 +19,10 @@ COPY app/ ./app/
19
  COPY data/processed_data/chunks.pkl ./data/processed_data/
20
  COPY data/processed_data/embedded_docs.pkl ./data/processed_data/
21
 
 
 
 
22
  # Set the entry point to run the Streamlit app
 
23
  EXPOSE 8501
24
- CMD ["streamlit", "run", "app/app.py", "--server.address=0.0.0.0", "--server.port=8501"]
 
19
  COPY data/processed_data/chunks.pkl ./data/processed_data/
20
  COPY data/processed_data/embedded_docs.pkl ./data/processed_data/
21
 
22
+ # Enable more verbose logging
23
+ ENV PYTHONUNBUFFERED=1
24
+
25
  # Set the entry point to run the Streamlit app
26
+ # Use debug.py to troubleshoot if the main app fails
27
  EXPOSE 8501
28
+ CMD ["streamlit", "run", "app/app.py", "--server.address=0.0.0.0", "--server.port=8501", "--logger.level=debug"]
app/app.py CHANGED
@@ -103,26 +103,54 @@ def find_processed_data():
103
  @st.cache_resource
104
  def initialize_vectorstore():
105
  """Initialize the vectorstore from processed data"""
106
- processed_data_path = find_processed_data()
107
- st.write(f"Using processed data from: {processed_data_path}")
108
-
109
- # Load chunks for reference
110
- chunks_path = os.path.join(processed_data_path, "chunks.pkl")
111
- with open(chunks_path, "rb") as f:
112
- chunks = pickle.load(f)
113
-
114
- # Load embedded docs
115
- embedded_docs_path = os.path.join(processed_data_path, "embedded_docs.pkl")
116
- with open(embedded_docs_path, "rb") as f:
117
- embedded_docs = pickle.load(f)
118
-
119
- # Initialize embedding model
120
- embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")
121
-
122
- # Create custom vectorstore
123
- vectorstore = CustomVectorStore(embedded_docs, embedding_model)
124
-
125
- return vectorstore, chunks
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
 
127
  # Define prompts
128
  RAG_PROMPT = """
 
103
  @st.cache_resource
104
  def initialize_vectorstore():
105
  """Initialize the vectorstore from processed data"""
106
+ try:
107
+ processed_data_path = find_processed_data()
108
+ st.write(f"Using processed data from: {processed_data_path}")
109
+
110
+ # Load chunks for reference
111
+ chunks_path = os.path.join(processed_data_path, "chunks.pkl")
112
+ st.write(f"Attempting to load chunks from: {chunks_path}")
113
+ try:
114
+ with open(chunks_path, "rb") as f:
115
+ chunks = pickle.load(f)
116
+ st.write(f"Successfully loaded chunks: {len(chunks)} items")
117
+ except Exception as e:
118
+ st.error(f"Error loading chunks.pkl: {str(e)}")
119
+ chunks = []
120
+
121
+ # Load embedded docs
122
+ embedded_docs_path = os.path.join(processed_data_path, "embedded_docs.pkl")
123
+ st.write(f"Attempting to load embedded docs from: {embedded_docs_path}")
124
+ try:
125
+ with open(embedded_docs_path, "rb") as f:
126
+ embedded_docs = pickle.load(f)
127
+ st.write(f"Successfully loaded embedded docs: {len(embedded_docs)} items")
128
+ except Exception as e:
129
+ st.error(f"Error loading embedded_docs.pkl: {str(e)}")
130
+ embedded_docs = []
131
+
132
+ if not chunks or not embedded_docs:
133
+ st.warning("Using empty vectorstore as fallback due to loading errors")
134
+ # Return empty vectorstore as fallback
135
+ embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")
136
+ vectorstore = CustomVectorStore([], embedding_model)
137
+ return vectorstore, []
138
+
139
+ # Initialize embedding model
140
+ try:
141
+ embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")
142
+ st.write("Successfully initialized OpenAI embeddings model")
143
+ except Exception as e:
144
+ st.error(f"Error initializing OpenAI embeddings model: {str(e)}")
145
+ raise
146
+
147
+ # Create custom vectorstore
148
+ vectorstore = CustomVectorStore(embedded_docs, embedding_model)
149
+
150
+ return vectorstore, chunks
151
+ except Exception as e:
152
+ st.error(f"Error in vectorstore initialization: {str(e)}")
153
+ raise
154
 
155
  # Define prompts
156
  RAG_PROMPT = """
app/debug.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+ import pickle
4
+ from pathlib import Path
5
+
6
+ st.title("Debug App")
7
+
8
+ # Check environment variables
9
+ st.write("## Environment Variables")
10
+ if os.environ.get("OPENAI_API_KEY"):
11
+ st.write("βœ… OPENAI_API_KEY is set")
12
+ else:
13
+ st.error("❌ OPENAI_API_KEY is not set")
14
+
15
+ # Try to find data directory
16
+ st.write("## Data Directory")
17
+ possible_paths = [
18
+ "data/processed_data",
19
+ "app/data/processed_data",
20
+ "/data/processed_data",
21
+ "/app/data/processed_data"
22
+ ]
23
+
24
+ for path in possible_paths:
25
+ if os.path.exists(path):
26
+ st.write(f"βœ… Found data directory at: {path}")
27
+
28
+ # Check for pickle files
29
+ chunks_path = os.path.join(path, "chunks.pkl")
30
+ if os.path.exists(chunks_path):
31
+ st.write(f"βœ… Found chunks.pkl: {os.path.getsize(chunks_path) / (1024*1024):.2f} MB")
32
+
33
+ # Try to load
34
+ try:
35
+ with open(chunks_path, "rb") as f:
36
+ chunks = pickle.load(f)
37
+ st.write(f"βœ… Successfully loaded chunks: {len(chunks)} items")
38
+ except Exception as e:
39
+ st.error(f"❌ Error loading chunks.pkl: {str(e)}")
40
+ else:
41
+ st.error(f"❌ chunks.pkl not found in {path}")
42
+
43
+ embedded_docs_path = os.path.join(path, "embedded_docs.pkl")
44
+ if os.path.exists(embedded_docs_path):
45
+ st.write(f"βœ… Found embedded_docs.pkl: {os.path.getsize(embedded_docs_path) / (1024*1024):.2f} MB")
46
+
47
+ # Try to load
48
+ try:
49
+ with open(embedded_docs_path, "rb") as f:
50
+ embedded_docs = pickle.load(f)
51
+ st.write(f"βœ… Successfully loaded embedded_docs: {len(embedded_docs)} items")
52
+ except Exception as e:
53
+ st.error(f"❌ Error loading embedded_docs.pkl: {str(e)}")
54
+ else:
55
+ st.error(f"❌ embedded_docs.pkl not found in {path}")
56
+
57
+ break
58
+ else:
59
+ st.error("❌ Could not find data directory")
60
+
61
+ st.write("## System Info")
62
+ import sys
63
+ st.write(f"Python version: {sys.version}")
64
+ st.write(f"Working directory: {os.getcwd()}")
65
+ st.write(f"Directory contents: {os.listdir('.')}")
66
+
67
+ st.write("Debug complete.")