vitalune commited on
Commit
e9b2b45
·
verified ·
1 Parent(s): c112e54

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +13 -58
src/streamlit_app.py CHANGED
@@ -17,8 +17,8 @@ load_dotenv()
17
  LLM_MODEL = "gpt-5-nano-2025-08-07"
18
  EMBEDDING_MODEL = "text-embedding-3-small"
19
  TEMPERATURE = 0.1
20
- DATA_DIR = "data"
21
- PERSIST_DIR = "./storage"
22
 
23
  # System prompt configuration
24
  # This can be customized to change the chatbot's behavior and personality
@@ -37,37 +37,10 @@ st.set_page_config(
37
  layout="centered"
38
  )
39
 
40
- # Helper function to get API keys from multiple sources
41
- def get_api_key(key_name: str) -> str:
42
- """
43
- Get API key from multiple sources in priority order:
44
- 1. Environment variables (works for local dev, Docker, and Hugging Face Spaces)
45
- 2. Streamlit secrets (works for Streamlit Cloud)
46
-
47
- Hugging Face Spaces: Set secrets in Space Settings > Repository secrets
48
- Streamlit Cloud: Set secrets in App Settings > Secrets
49
- Local dev: Use .env file or export environment variables
50
- """
51
- # Try environment variable first (highest priority)
52
- api_key = os.getenv(key_name)
53
- if api_key:
54
- return api_key
55
-
56
- # Try Streamlit secrets as fallback
57
- try:
58
- if key_name in st.secrets:
59
- return st.secrets[key_name]
60
- except (FileNotFoundError, KeyError):
61
- pass
62
-
63
- return None
64
-
65
- # Get API keys from environment variables or Streamlit secrets
66
- # For Hugging Face Spaces: Add these as secrets in your Space settings
67
- # For Streamlit Cloud: Add these in the app secrets
68
- # For local development: Use .env file
69
- openai_api_key = get_api_key('OPENAI_API_KEY')
70
- llama_cloud_api_key = get_api_key('LLAMA_CLOUD_API_KEY')
71
 
72
  # Initialize chat history
73
  if "messages" not in st.session_state:
@@ -78,7 +51,6 @@ def load_documents_with_llamaparse(data_dir: str, llama_api_key: str) -> List[Do
78
  """
79
  Load documents from data directory using LlamaParse for complex file types
80
  and SimpleDirectoryReader for basic text files.
81
-
82
  Supported complex file types: PDF, DOCX, PPTX, XLSX
83
  """
84
  data_path = Path(data_dir)
@@ -162,7 +134,7 @@ def load_documents_with_llamaparse(data_dir: str, llama_api_key: str) -> List[Do
162
 
163
  # Initialize query engine
164
  @st.cache_resource
165
- def initialize_query_engine(_openai_api_key, _llama_api_key, _system_prompt):
166
  """Initialize the LlamaIndex query engine with caching"""
167
 
168
  # Set API keys
@@ -171,11 +143,7 @@ def initialize_query_engine(_openai_api_key, _llama_api_key, _system_prompt):
171
  os.environ['LLAMA_CLOUD_API_KEY'] = _llama_api_key
172
 
173
  # Configure models with backend configuration
174
- llm = OpenAI(
175
- model=LLM_MODEL,
176
- temperature=TEMPERATURE,
177
- system_prompt=_system_prompt
178
- )
179
  embed_model = OpenAIEmbedding(model=EMBEDDING_MODEL)
180
 
181
  try:
@@ -203,7 +171,7 @@ def initialize_query_engine(_openai_api_key, _llama_api_key, _system_prompt):
203
  )
204
  # Store for later
205
  index.storage_context.persist(persist_dir=PERSIST_DIR)
206
- status = f"Index created with {len(documents)} documents"
207
  else:
208
  # Load existing index
209
  storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
@@ -213,7 +181,7 @@ def initialize_query_engine(_openai_api_key, _llama_api_key, _system_prompt):
213
  # This ensures the query engine uses the correct models
214
  index._llm = llm
215
  index._embed_model = embed_model
216
- status = "Index loaded from storage"
217
 
218
  # Create query engine
219
  query_engine = index.as_query_engine(llm=llm, embed_model=embed_model)
@@ -224,16 +192,7 @@ def initialize_query_engine(_openai_api_key, _llama_api_key, _system_prompt):
224
 
225
  # Main chat interface
226
  if not openai_api_key:
227
- st.error("⚠️ OPENAI_API_KEY is required to run CatBot")
228
- st.info("""
229
- **How to set the API key:**
230
-
231
- - **Hugging Face Spaces**: Go to Settings → Repository secrets → Add `OPENAI_API_KEY`
232
- - **Local Development**: Create a `.env` file with `OPENAI_API_KEY=your_key_here`
233
- - **Streamlit Cloud**: Add to App Settings → Secrets
234
-
235
- Get your OpenAI API key from: https://platform.openai.com/api-keys
236
- """)
237
  st.stop()
238
 
239
  # Display info about LlamaParse availability
@@ -243,11 +202,7 @@ if not llama_cloud_api_key:
243
  # Initialize query engine
244
  if "query_engine" not in st.session_state:
245
  with st.spinner("Initializing RAG agent..."):
246
- query_engine, status = initialize_query_engine(
247
- openai_api_key,
248
- llama_cloud_api_key,
249
- SYSTEM_PROMPT
250
- )
251
  st.session_state.query_engine = query_engine
252
 
253
  if query_engine is None:
@@ -290,4 +245,4 @@ if prompt := st.chat_input("Ask a question about your documents"):
290
  st.session_state.messages.append({
291
  "role": "assistant",
292
  "content": error_msg
293
- })
 
17
  LLM_MODEL = "gpt-5-nano-2025-08-07"
18
  EMBEDDING_MODEL = "text-embedding-3-small"
19
  TEMPERATURE = 0.1
20
+ DATA_DIR = "src/data"
21
+ PERSIST_DIR = "src/storage"
22
 
23
  # System prompt configuration
24
  # This can be customized to change the chatbot's behavior and personality
 
37
  layout="centered"
38
  )
39
 
40
+ # Get API keys from environment variable or Streamlit secrets
41
+ # These should be set before running the Streamlit app
42
+ openai_api_key = os.getenv('OPENAI_API_KEY') or st.secrets.get("OPENAI_API_KEY")
43
+ llama_cloud_api_key = os.getenv('LLAMA_CLOUD_API_KEY') or st.secrets.get("LLAMA_CLOUD_API_KEY")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
  # Initialize chat history
46
  if "messages" not in st.session_state:
 
51
  """
52
  Load documents from data directory using LlamaParse for complex file types
53
  and SimpleDirectoryReader for basic text files.
 
54
  Supported complex file types: PDF, DOCX, PPTX, XLSX
55
  """
56
  data_path = Path(data_dir)
 
134
 
135
  # Initialize query engine
136
  @st.cache_resource
137
+ def initialize_query_engine(_openai_api_key, _llama_api_key):
138
  """Initialize the LlamaIndex query engine with caching"""
139
 
140
  # Set API keys
 
143
  os.environ['LLAMA_CLOUD_API_KEY'] = _llama_api_key
144
 
145
  # Configure models with backend configuration
146
+ llm = OpenAI(model=LLM_MODEL, temperature=TEMPERATURE)
 
 
 
 
147
  embed_model = OpenAIEmbedding(model=EMBEDDING_MODEL)
148
 
149
  try:
 
171
  )
172
  # Store for later
173
  index.storage_context.persist(persist_dir=PERSIST_DIR)
174
+ status = f"Index created with {len(documents)} documents"
175
  else:
176
  # Load existing index
177
  storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
 
181
  # This ensures the query engine uses the correct models
182
  index._llm = llm
183
  index._embed_model = embed_model
184
+ status = "Index loaded from storage"
185
 
186
  # Create query engine
187
  query_engine = index.as_query_engine(llm=llm, embed_model=embed_model)
 
192
 
193
  # Main chat interface
194
  if not openai_api_key:
195
+ st.warning("⚠️ Please set the OPENAI_API_KEY environment variable to get started.")
 
 
 
 
 
 
 
 
 
196
  st.stop()
197
 
198
  # Display info about LlamaParse availability
 
202
  # Initialize query engine
203
  if "query_engine" not in st.session_state:
204
  with st.spinner("Initializing RAG agent..."):
205
+ query_engine, status = initialize_query_engine(openai_api_key, llama_cloud_api_key)
 
 
 
 
206
  st.session_state.query_engine = query_engine
207
 
208
  if query_engine is None:
 
245
  st.session_state.messages.append({
246
  "role": "assistant",
247
  "content": error_msg
248
+ })