Spaces:

vitalune
/

CatBot

Sleeping

App Files Files Community

vitalune commited on Oct 28, 2025

Commit

e9b2b45

verified ·

1 Parent(s): c112e54

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +13 -58

src/streamlit_app.py CHANGED Viewed

@@ -17,8 +17,8 @@ load_dotenv()
 LLM_MODEL = "gpt-5-nano-2025-08-07"
 EMBEDDING_MODEL = "text-embedding-3-small"
 TEMPERATURE = 0.1
-DATA_DIR = "data"
-PERSIST_DIR = "./storage"
 # System prompt configuration
 # This can be customized to change the chatbot's behavior and personality
@@ -37,37 +37,10 @@ st.set_page_config(
     layout="centered"
 )
-# Helper function to get API keys from multiple sources
-def get_api_key(key_name: str) -> str:
-    """
-    Get API key from multiple sources in priority order:
-    1. Environment variables (works for local dev, Docker, and Hugging Face Spaces)
-    2. Streamlit secrets (works for Streamlit Cloud)
-    Hugging Face Spaces: Set secrets in Space Settings > Repository secrets
-    Streamlit Cloud: Set secrets in App Settings > Secrets
-    Local dev: Use .env file or export environment variables
-    """
-    # Try environment variable first (highest priority)
-    api_key = os.getenv(key_name)
-    if api_key:
-        return api_key
-    # Try Streamlit secrets as fallback
-    try:
-        if key_name in st.secrets:
-            return st.secrets[key_name]
-    except (FileNotFoundError, KeyError):
-        pass
-    return None
-# Get API keys from environment variables or Streamlit secrets
-# For Hugging Face Spaces: Add these as secrets in your Space settings
-# For Streamlit Cloud: Add these in the app secrets
-# For local development: Use .env file
-openai_api_key = get_api_key('OPENAI_API_KEY')
-llama_cloud_api_key = get_api_key('LLAMA_CLOUD_API_KEY')
 # Initialize chat history
 if "messages" not in st.session_state:
@@ -78,7 +51,6 @@ def load_documents_with_llamaparse(data_dir: str, llama_api_key: str) -> List[Do
     """
     Load documents from data directory using LlamaParse for complex file types
     and SimpleDirectoryReader for basic text files.
     Supported complex file types: PDF, DOCX, PPTX, XLSX
     """
     data_path = Path(data_dir)
@@ -162,7 +134,7 @@ def load_documents_with_llamaparse(data_dir: str, llama_api_key: str) -> List[Do
 # Initialize query engine
 @st.cache_resource
-def initialize_query_engine(_openai_api_key, _llama_api_key, _system_prompt):
     """Initialize the LlamaIndex query engine with caching"""
     # Set API keys
@@ -171,11 +143,7 @@ def initialize_query_engine(_openai_api_key, _llama_api_key, _system_prompt):
         os.environ['LLAMA_CLOUD_API_KEY'] = _llama_api_key
     # Configure models with backend configuration
-    llm = OpenAI(
-        model=LLM_MODEL,
-        temperature=TEMPERATURE,
-        system_prompt=_system_prompt
-    )
     embed_model = OpenAIEmbedding(model=EMBEDDING_MODEL)
     try:
@@ -203,7 +171,7 @@ def initialize_query_engine(_openai_api_key, _llama_api_key, _system_prompt):
             )
             # Store for later
             index.storage_context.persist(persist_dir=PERSIST_DIR)
-            status = f"Index created with {len(documents)} documents"
         else:
             # Load existing index
             storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
@@ -213,7 +181,7 @@ def initialize_query_engine(_openai_api_key, _llama_api_key, _system_prompt):
             # This ensures the query engine uses the correct models
             index._llm = llm
             index._embed_model = embed_model
-            status = "Index loaded from storage"
         # Create query engine
         query_engine = index.as_query_engine(llm=llm, embed_model=embed_model)
@@ -224,16 +192,7 @@ def initialize_query_engine(_openai_api_key, _llama_api_key, _system_prompt):
 # Main chat interface
 if not openai_api_key:
-    st.error("⚠️ OPENAI_API_KEY is required to run CatBot")
-    st.info("""
-    **How to set the API key:**
-    - **Hugging Face Spaces**: Go to Settings → Repository secrets → Add `OPENAI_API_KEY`
-    - **Local Development**: Create a `.env` file with `OPENAI_API_KEY=your_key_here`
-    - **Streamlit Cloud**: Add to App Settings → Secrets
-    Get your OpenAI API key from: https://platform.openai.com/api-keys
-    """)
     st.stop()
 # Display info about LlamaParse availability
@@ -243,11 +202,7 @@ if not llama_cloud_api_key:
 # Initialize query engine
 if "query_engine" not in st.session_state:
     with st.spinner("Initializing RAG agent..."):
-        query_engine, status = initialize_query_engine(
-            openai_api_key,
-            llama_cloud_api_key,
-            SYSTEM_PROMPT
-        )
         st.session_state.query_engine = query_engine
         if query_engine is None:
@@ -290,4 +245,4 @@ if prompt := st.chat_input("Ask a question about your documents"):
                 st.session_state.messages.append({
                     "role": "assistant",
                     "content": error_msg
-                })

 LLM_MODEL = "gpt-5-nano-2025-08-07"
 EMBEDDING_MODEL = "text-embedding-3-small"
 TEMPERATURE = 0.1
+DATA_DIR = "src/data"
+PERSIST_DIR = "src/storage"
 # System prompt configuration
 # This can be customized to change the chatbot's behavior and personality
     layout="centered"
 )
+# Get API keys from environment variable or Streamlit secrets
+# These should be set before running the Streamlit app
+openai_api_key = os.getenv('OPENAI_API_KEY') or st.secrets.get("OPENAI_API_KEY")
+llama_cloud_api_key = os.getenv('LLAMA_CLOUD_API_KEY') or st.secrets.get("LLAMA_CLOUD_API_KEY")
 # Initialize chat history
 if "messages" not in st.session_state:
     """
     Load documents from data directory using LlamaParse for complex file types
     and SimpleDirectoryReader for basic text files.
     Supported complex file types: PDF, DOCX, PPTX, XLSX
     """
     data_path = Path(data_dir)
 # Initialize query engine
 @st.cache_resource
+def initialize_query_engine(_openai_api_key, _llama_api_key):
     """Initialize the LlamaIndex query engine with caching"""
     # Set API keys
         os.environ['LLAMA_CLOUD_API_KEY'] = _llama_api_key
     # Configure models with backend configuration
+    llm = OpenAI(model=LLM_MODEL, temperature=TEMPERATURE)
     embed_model = OpenAIEmbedding(model=EMBEDDING_MODEL)
     try:
             )
             # Store for later
             index.storage_context.persist(persist_dir=PERSIST_DIR)
+            status = f"✅ Index created with {len(documents)} documents"
         else:
             # Load existing index
             storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
             # This ensures the query engine uses the correct models
             index._llm = llm
             index._embed_model = embed_model
+            status = "✅ Index loaded from storage"
         # Create query engine
         query_engine = index.as_query_engine(llm=llm, embed_model=embed_model)
 # Main chat interface
 if not openai_api_key:
+    st.warning("⚠️ Please set the OPENAI_API_KEY environment variable to get started.")
     st.stop()
 # Display info about LlamaParse availability
 # Initialize query engine
 if "query_engine" not in st.session_state:
     with st.spinner("Initializing RAG agent..."):
+        query_engine, status = initialize_query_engine(openai_api_key, llama_cloud_api_key)
         st.session_state.query_engine = query_engine
         if query_engine is None:
                 st.session_state.messages.append({
                     "role": "assistant",
                     "content": error_msg
+                })