Spaces:

MHamdan
/

SPARKNET

Sleeping

App Files Files Community

MHamdan commited on Jan 25

Commit

a2fb804

1 Parent(s): 667e85c

Fix Streamlit Cloud deployment - add cloud mode support

Browse files

Files changed (5) hide show

demo/pages/1_🔬_Live_Processing.py +45 -18
demo/pages/2_💬_Interactive_RAG.py +194 -126
demo/rag_config.py +162 -95
demo/requirements.txt +67 -10
demo/state_manager.py +19 -3

demo/pages/1_🔬_Live_Processing.py CHANGED Viewed

@@ -345,23 +345,43 @@ with col_status:
     ollama_ok, models = check_ollama()
     rag_system = get_unified_rag_system()
     status_cols = st.columns(2)
     with status_cols[0]:
         if ollama_ok:
             st.success(f"Ollama ({len(models)})")
         else:
-            st.error("Ollama Offline")
     with status_cols[1]:
         if rag_system["status"] == "ready":
             st.success("RAG Ready")
         else:
             st.error("RAG Error")
     # State summary
     summary = state_manager.get_summary()
     st.metric("Processed Docs", summary["total_documents"])
-    st.metric("Indexed Chunks", summary["total_indexed_chunks"])
 st.markdown("---")
@@ -503,22 +523,29 @@ if file_to_process and st.button("🚀 Start Processing", type="primary", use_co
             # Stage 5: RAG Indexing
             indexed_count = 0
-            if auto_index and rag_system["status"] == "ready" and chunks:
-                status_text.markdown("**📚 Indexing to RAG...**")
-                state_manager.update_processing(doc_id, "indexing", 0.9, "Indexing to RAG...")
-                progress_bar.progress(90)
-                # Auto-index
-                index_result = auto_index_processed_document(
-                    doc_id=doc_id,
-                    text=processing_result.get("raw_text", ""),
-                    chunks=chunks,
-                    metadata={"filename": filename, "source": file_to_process}
-                )
-                if index_result["success"]:
-                    indexed_count = index_result["num_chunks"]
-                    state_manager.mark_indexed(doc_id, indexed_count)
             # Complete
             progress_bar.progress(100)

     ollama_ok, models = check_ollama()
     rag_system = get_unified_rag_system()
+    rag_mode = rag_system.get("mode", "error")
+    # Check for cloud providers
+    try:
+        from rag_config import check_cloud_providers
+        cloud_providers = check_cloud_providers()
+    except:
+        cloud_providers = {}
     status_cols = st.columns(2)
     with status_cols[0]:
         if ollama_ok:
             st.success(f"Ollama ({len(models)})")
+        elif cloud_providers:
+            st.info(f"Cloud ({len(cloud_providers)})")
         else:
+            st.warning("Demo Mode")
     with status_cols[1]:
         if rag_system["status"] == "ready":
             st.success("RAG Ready")
+        elif rag_mode == "cloud":
+            st.info("Cloud LLM")
+        elif rag_mode == "demo":
+            st.warning("Demo Mode")
         else:
             st.error("RAG Error")
     # State summary
     summary = state_manager.get_summary()
     st.metric("Processed Docs", summary["total_documents"])
+    # Show different metrics based on mode
+    if rag_mode == "cloud":
+        st.metric("Cloud Providers", len(cloud_providers))
+        st.caption("RAG indexing requires Ollama")
+    else:
+        st.metric("Indexed Chunks", summary["total_indexed_chunks"])
 st.markdown("---")
             # Stage 5: RAG Indexing
             indexed_count = 0
+            if auto_index and chunks:
+                if rag_system["status"] == "ready":
+                    status_text.markdown("**📚 Indexing to RAG...**")
+                    state_manager.update_processing(doc_id, "indexing", 0.9, "Indexing to RAG...")
+                    progress_bar.progress(90)
+                    # Auto-index
+                    index_result = auto_index_processed_document(
+                        doc_id=doc_id,
+                        text=processing_result.get("raw_text", ""),
+                        chunks=chunks,
+                        metadata={"filename": filename, "source": file_to_process}
+                    )
+                    if index_result["success"]:
+                        indexed_count = index_result["num_chunks"]
+                        state_manager.mark_indexed(doc_id, indexed_count)
+                elif rag_mode == "cloud":
+                    status_text.markdown("**☁️ Cloud mode - skipping RAG indexing...**")
+                    state_manager.update_processing(doc_id, "indexing", 0.9, "Cloud mode - no indexing")
+                    progress_bar.progress(90)
+                    # In cloud mode, document is processed but not indexed
+                    # Users can still query documents via cloud LLM
             # Complete
             progress_bar.progress(100)

demo/pages/2_💬_Interactive_RAG.py CHANGED Viewed

@@ -354,6 +354,14 @@ rag_system = get_unified_rag_system()
 ollama_ok, models = check_ollama()
 stats = get_store_stats()
 indexed_docs = get_indexed_documents()
 # Session state
 if "messages" not in st.session_state:
@@ -393,25 +401,45 @@ cols = st.columns(5)
 with cols[0]:
     if ollama_ok:
         st.success(f"Ollama ({len(models)})")
     else:
-        st.error("Ollama Offline")
 with cols[1]:
     if rag_system["status"] == "ready":
         st.success("RAG Ready")
     else:
         st.error("RAG Error")
 with cols[2]:
-    st.info(f"{rag_system.get('llm_model', 'N/A').split(':')[0]}")
 with cols[3]:
     chunk_count = stats.get('total_chunks', 0)
     if chunk_count > 0:
         st.success(f"{chunk_count} Chunks")
     else:
         st.warning("0 Chunks")
 with cols[4]:
-    st.info(f"{rag_system.get('embed_model', 'N/A').split(':')[0]}")
-if rag_system["status"] == "error":
     with st.expander("RAG Error Details"):
         st.code(rag_system["error"])
@@ -616,8 +644,9 @@ else:
             st.markdown(pending_query)
         with st.chat_message("assistant"):
-            if rag_system["status"] != "ready":
-                st.error("RAG system not ready")
                 st.session_state.messages.append({"role": "assistant", "content": "RAG system not ready"})
             else:
                 # Show progress
@@ -645,49 +674,64 @@ else:
                     st.error(f"Error: {error}")
                     st.session_state.messages.append({"role": "assistant", "content": f"Error: {error}"})
                 elif response:
-                    # Display answer
-                    st.markdown(response.answer)
-                    # Build metadata
-                    metadata = {
-                        "latency_ms": response.latency_ms,
-                        "num_sources": response.num_sources,
-                        "confidence": response.confidence,
-                        "validated": response.validated,
-                    }
-                    # Display metrics
-                    if show_metrics:
-                        m_cols = st.columns(4)
-                        with m_cols[0]:
-                            st.markdown(f'<div class="metric-box"><div class="metric-value">{metadata.get("latency_ms", 0):.0f}ms</div><div class="metric-label">Latency</div></div>', unsafe_allow_html=True)
-                        with m_cols[1]:
-                            st.markdown(f'<div class="metric-box"><div class="metric-value">{metadata.get("num_sources", 0)}</div><div class="metric-label">Sources</div></div>', unsafe_allow_html=True)
-                        with m_cols[2]:
-                            conf = metadata.get("confidence", 0)
-                            color = "#4ECDC4" if conf > 0.6 else "#ffc107" if conf > 0.3 else "#dc3545"
-                            st.markdown(f'<div class="metric-box"><div class="metric-value" style="color:{color}">{conf:.0%}</div><div class="metric-label">Confidence</div></div>', unsafe_allow_html=True)
-                        with m_cols[3]:
-                            val = "✓" if metadata.get("validated") else "?"
-                            st.markdown(f'<div class="metric-box"><div class="metric-value">{val}</div><div class="metric-label">Validated</div></div>', unsafe_allow_html=True)
-                    # Build citations list
-                    citations = []
-                    if hasattr(response, 'citations') and response.citations:
-                        for i, cite in enumerate(response.citations):
-                            citations.append({
-                                "index": i + 1,
-                                "text_snippet": cite.text_snippet if hasattr(cite, 'text_snippet') else str(cite),
-                                "relevance_score": cite.relevance_score if hasattr(cite, 'relevance_score') else 0.0,
-                            })
-                    # Store message with metadata
-                    st.session_state.messages.append({
-                        "role": "assistant",
-                        "content": response.answer,
-                        "metadata": metadata,
-                        "citations": citations,
-                    })
                 else:
                     st.warning("No response from RAG system")
                     st.session_state.messages.append({"role": "assistant", "content": "No response from RAG system"})
@@ -701,18 +745,23 @@ else:
             st.markdown(prompt)
         with st.chat_message("assistant"):
-            if rag_system["status"] != "ready":
-                st.error("RAG system not ready")
                 st.session_state.messages.append({"role": "assistant", "content": "RAG system not ready"})
             else:
-                # Show progress
                 progress = st.progress(0)
                 status = st.empty()
-                stages = ["Planning", "Retrieving", "Reranking", "Generating", "Validating"]
                 for i, stage in enumerate(stages):
                     status.markdown(f"**{stage}...**")
-                    progress.progress((i + 1) * 20)
                     time.sleep(0.15)
                 # Build filters for document
@@ -730,79 +779,94 @@ else:
                     st.error(f"Error: {error}")
                     st.session_state.messages.append({"role": "assistant", "content": f"Error: {error}"})
                 elif response:
-                    # Display answer
-                    st.markdown(response.answer)
-                    # Build metadata
-                    metadata = {
-                        "latency_ms": response.latency_ms,
-                        "num_sources": response.num_sources,
-                        "confidence": response.confidence,
-                        "validated": response.validated,
-                    }
-                    # Display metrics
-                    if show_metrics:
-                        m_cols = st.columns(4)
-                        with m_cols[0]:
-                            st.markdown(f'<div class="metric-box"><div class="metric-value">{response.latency_ms:.0f}ms</div><div class="metric-label">Latency</div></div>', unsafe_allow_html=True)
-                        with m_cols[1]:
-                            st.markdown(f'<div class="metric-box"><div class="metric-value">{response.num_sources}</div><div class="metric-label">Sources</div></div>', unsafe_allow_html=True)
-                        with m_cols[2]:
-                            conf_color = "#4ECDC4" if response.confidence > 0.6 else "#ffc107" if response.confidence > 0.3 else "#dc3545"
-                            st.markdown(f'<div class="metric-box"><div class="metric-value" style="color:{conf_color}">{response.confidence:.0%}</div><div class="metric-label">Confidence</div></div>', unsafe_allow_html=True)
-                        with m_cols[3]:
-                            val_icon = "✓" if response.validated else "?"
-                            st.markdown(f'<div class="metric-box"><div class="metric-value">{val_icon}</div><div class="metric-label">Validated</div></div>', unsafe_allow_html=True)
-                    # Display sources
-                    citations = []
-                    if show_sources and response.citations:
-                        with st.expander(f"Sources ({len(response.citations)})"):
-                            for i, cite in enumerate(response.citations):
-                                color = get_chunk_color(i)
-                                citations.append({
-                                    "index": cite.index,
-                                    "relevance_score": cite.relevance_score,
-                                    "text_snippet": cite.text_snippet,
-                                })
-                                st.markdown(f"""
-                                <div class="source-card" style="border-left-color: {color};">
-                                    <div class="source-header">
-                                        <strong>[{cite.index}]</strong> • Relevance: {cite.relevance_score:.0%}
                                     </div>
-                                    <div class="source-text">{cite.text_snippet[:300]}...</div>
-                                </div>
-                                """, unsafe_allow_html=True)
-                    # Chunk preview (semantic search)
-                    if show_chunk_preview:
-                        with st.expander("Chunk Preview (Top Matches)"):
-                            chunks = search_similar_chunks(
-                                prompt,
-                                top_k=5,
-                                doc_filter=st.session_state.doc_filter
-                            )
-                            for i, chunk in enumerate(chunks):
-                                sim = chunk.get("similarity", 0)
-                                color = "#4ECDC4" if sim > 0.7 else "#ffc107" if sim > 0.5 else "#8b949e"
-                                st.markdown(f"""
-                                <div class="chunk-preview" style="border-left: 3px solid {color};">
-                                    <div style="font-size: 10px; color: #8b949e;">
-                                        Similarity: <span style="color: {color};">{sim:.0%}</span> |
-                                        Doc: {chunk.get('document_id', 'N/A')[:15]}...
                                     </div>
-                                    <div style="margin-top: 4px;">{chunk.get('text', '')[:200]}...</div>
-                                </div>
-                                """, unsafe_allow_html=True)
-                    # Save to history
-                    st.session_state.messages.append({
-                        "role": "assistant",
-                        "content": response.answer,
-                        "citations": citations,
-                        "metadata": metadata,
-                    })
 # Dynamic suggested questions based on document content
 st.markdown("---")
@@ -817,12 +881,14 @@ dynamic_questions = generate_dynamic_questions(state_manager, indexed_docs, max_
 # Display as clickable buttons
 sample_cols = st.columns(len(dynamic_questions))
 for i, q in enumerate(dynamic_questions):
     with sample_cols[i]:
         # Truncate long questions for button display
         display_q = q if len(q) <= 35 else q[:32] + "..."
         if st.button(display_q, key=f"sample_{i}", use_container_width=True,
-                    disabled=(stats.get('total_chunks', 0) == 0),
                     help=q if len(q) > 35 else None):
             st.session_state.messages.append({"role": "user", "content": q})
             st.rerun()
@@ -830,6 +896,8 @@ for i, q in enumerate(dynamic_questions):
 # Show hint about dynamic questions
 if stats.get('total_chunks', 0) > 0:
     st.caption("📌 Questions are generated based on your indexed documents")
 # Architecture info
 with st.expander("Multi-Agent RAG Architecture"):

 ollama_ok, models = check_ollama()
 stats = get_store_stats()
 indexed_docs = get_indexed_documents()
+rag_mode = rag_system.get("mode", "error")
+# Check for cloud providers
+try:
+    from rag_config import check_cloud_providers
+    cloud_providers = check_cloud_providers()
+except:
+    cloud_providers = {}
 # Session state
 if "messages" not in st.session_state:
 with cols[0]:
     if ollama_ok:
         st.success(f"Ollama ({len(models)})")
+    elif cloud_providers:
+        st.info(f"Cloud ({len(cloud_providers)})")
     else:
+        st.warning("Demo Mode")
 with cols[1]:
     if rag_system["status"] == "ready":
         st.success("RAG Ready")
+    elif rag_mode == "cloud":
+        st.info("Cloud LLM")
+    elif rag_mode == "demo":
+        st.warning("Demo Mode")
     else:
         st.error("RAG Error")
 with cols[2]:
+    if rag_mode == "cloud" and cloud_providers:
+        provider_name = list(cloud_providers.keys())[0].title()
+        st.info(f"{provider_name}")
+    else:
+        st.info(f"{rag_system.get('llm_model', 'N/A').split(':')[0]}")
 with cols[3]:
     chunk_count = stats.get('total_chunks', 0)
     if chunk_count > 0:
         st.success(f"{chunk_count} Chunks")
+    elif rag_mode == "cloud":
+        st.info("Cloud Q&A")
     else:
         st.warning("0 Chunks")
 with cols[4]:
+    if rag_mode == "cloud":
+        st.info("Cloud Embed")
+    else:
+        st.info(f"{rag_system.get('embed_model', 'N/A').split(':')[0]}")
+# Show cloud mode message
+if rag_mode == "cloud":
+    st.info("☁️ **Cloud Mode**: Using cloud LLM providers for Q&A. Document indexing requires Ollama.")
+elif rag_mode == "demo":
+    st.warning("📋 **Demo Mode**: Add API keys to secrets.toml or start Ollama for full functionality.")
+elif rag_system["status"] == "error":
     with st.expander("RAG Error Details"):
         st.code(rag_system["error"])
             st.markdown(pending_query)
         with st.chat_message("assistant"):
+            # Allow queries in both "ready" (Ollama) and "cloud" modes
+            if rag_system["status"] not in ["ready", "cloud"]:
+                st.error("RAG system not ready. Please configure Ollama or add cloud API keys.")
                 st.session_state.messages.append({"role": "assistant", "content": "RAG system not ready"})
             else:
                 # Show progress
                     st.error(f"Error: {error}")
                     st.session_state.messages.append({"role": "assistant", "content": f"Error: {error}"})
                 elif response:
+                    # Handle both RAG response objects and cloud mode dict responses
+                    is_cloud_response = isinstance(response, dict) and response.get("mode") == "cloud"
+                    if is_cloud_response:
+                        answer = response.get("answer", "")
+                        st.markdown(answer)
+                        st.info("☁️ *Response from cloud LLM (no document retrieval)*")
+                        st.session_state.messages.append({
+                            "role": "assistant",
+                            "content": answer,
+                            "metadata": {"mode": "cloud"},
+                            "citations": [],
+                        })
+                    else:
+                        # Display answer
+                        st.markdown(response.answer)
+                        # Build metadata
+                        metadata = {
+                            "latency_ms": response.latency_ms,
+                            "num_sources": response.num_sources,
+                            "confidence": response.confidence,
+                            "validated": response.validated,
+                        }
+                        # Display metrics
+                        if show_metrics:
+                            m_cols = st.columns(4)
+                            with m_cols[0]:
+                                st.markdown(f'<div class="metric-box"><div class="metric-value">{metadata.get("latency_ms", 0):.0f}ms</div><div class="metric-label">Latency</div></div>', unsafe_allow_html=True)
+                            with m_cols[1]:
+                                st.markdown(f'<div class="metric-box"><div class="metric-value">{metadata.get("num_sources", 0)}</div><div class="metric-label">Sources</div></div>', unsafe_allow_html=True)
+                            with m_cols[2]:
+                                conf = metadata.get("confidence", 0)
+                                color = "#4ECDC4" if conf > 0.6 else "#ffc107" if conf > 0.3 else "#dc3545"
+                                st.markdown(f'<div class="metric-box"><div class="metric-value" style="color:{color}">{conf:.0%}</div><div class="metric-label">Confidence</div></div>', unsafe_allow_html=True)
+                            with m_cols[3]:
+                                val = "✓" if metadata.get("validated") else "?"
+                                st.markdown(f'<div class="metric-box"><div class="metric-value">{val}</div><div class="metric-label">Validated</div></div>', unsafe_allow_html=True)
+                        # Build citations list
+                        citations = []
+                        if hasattr(response, 'citations') and response.citations:
+                            for i, cite in enumerate(response.citations):
+                                citations.append({
+                                    "index": i + 1,
+                                    "text_snippet": cite.text_snippet if hasattr(cite, 'text_snippet') else str(cite),
+                                    "relevance_score": cite.relevance_score if hasattr(cite, 'relevance_score') else 0.0,
+                                })
+                        # Store message with metadata
+                        st.session_state.messages.append({
+                            "role": "assistant",
+                            "content": response.answer,
+                            "metadata": metadata,
+                            "citations": citations,
+                        })
                 else:
                     st.warning("No response from RAG system")
                     st.session_state.messages.append({"role": "assistant", "content": "No response from RAG system"})
             st.markdown(prompt)
         with st.chat_message("assistant"):
+            # Allow queries in both "ready" (Ollama) and "cloud" modes
+            if rag_system["status"] not in ["ready", "cloud"]:
+                st.error("RAG system not ready. Please configure Ollama or add cloud API keys.")
                 st.session_state.messages.append({"role": "assistant", "content": "RAG system not ready"})
             else:
+                # Show progress - different stages for cloud vs RAG mode
                 progress = st.progress(0)
                 status = st.empty()
+                if rag_mode == "cloud":
+                    stages = ["Processing", "Generating"]
+                else:
+                    stages = ["Planning", "Retrieving", "Reranking", "Generating", "Validating"]
                 for i, stage in enumerate(stages):
                     status.markdown(f"**{stage}...**")
+                    progress.progress(int((i + 1) * 100 / len(stages)))
                     time.sleep(0.15)
                 # Build filters for document
                     st.error(f"Error: {error}")
                     st.session_state.messages.append({"role": "assistant", "content": f"Error: {error}"})
                 elif response:
+                    # Handle both RAG response objects and cloud mode dict responses
+                    is_cloud_response = isinstance(response, dict) and response.get("mode") == "cloud"
+                    if is_cloud_response:
+                        answer = response.get("answer", "")
+                        st.markdown(answer)
+                        st.info("☁️ *Response from cloud LLM (no document retrieval)*")
+                        st.session_state.messages.append({
+                            "role": "assistant",
+                            "content": answer,
+                            "metadata": {"mode": "cloud"},
+                            "citations": [],
+                        })
+                    else:
+                        # Display answer
+                        st.markdown(response.answer)
+                        # Build metadata
+                        metadata = {
+                            "latency_ms": response.latency_ms,
+                            "num_sources": response.num_sources,
+                            "confidence": response.confidence,
+                            "validated": response.validated,
+                        }
+                        # Display metrics
+                        if show_metrics:
+                            m_cols = st.columns(4)
+                            with m_cols[0]:
+                                st.markdown(f'<div class="metric-box"><div class="metric-value">{response.latency_ms:.0f}ms</div><div class="metric-label">Latency</div></div>', unsafe_allow_html=True)
+                            with m_cols[1]:
+                                st.markdown(f'<div class="metric-box"><div class="metric-value">{response.num_sources}</div><div class="metric-label">Sources</div></div>', unsafe_allow_html=True)
+                            with m_cols[2]:
+                                conf_color = "#4ECDC4" if response.confidence > 0.6 else "#ffc107" if response.confidence > 0.3 else "#dc3545"
+                                st.markdown(f'<div class="metric-box"><div class="metric-value" style="color:{conf_color}">{response.confidence:.0%}</div><div class="metric-label">Confidence</div></div>', unsafe_allow_html=True)
+                            with m_cols[3]:
+                                val_icon = "✓" if response.validated else "?"
+                                st.markdown(f'<div class="metric-box"><div class="metric-value">{val_icon}</div><div class="metric-label">Validated</div></div>', unsafe_allow_html=True)
+                        # Display sources
+                        citations = []
+                        if show_sources and response.citations:
+                            with st.expander(f"Sources ({len(response.citations)})"):
+                                for i, cite in enumerate(response.citations):
+                                    color = get_chunk_color(i)
+                                    citations.append({
+                                        "index": cite.index,
+                                        "relevance_score": cite.relevance_score,
+                                        "text_snippet": cite.text_snippet,
+                                    })
+                                    st.markdown(f"""
+                                    <div class="source-card" style="border-left-color: {color};">
+                                        <div class="source-header">
+                                            <strong>[{cite.index}]</strong> • Relevance: {cite.relevance_score:.0%}
+                                        </div>
+                                        <div class="source-text">{cite.text_snippet[:300]}...</div>
                                     </div>
+                                    """, unsafe_allow_html=True)
+                        # Chunk preview (semantic search) - only for non-cloud mode
+                        if show_chunk_preview and not is_cloud_response:
+                            with st.expander("Chunk Preview (Top Matches)"):
+                                chunks = search_similar_chunks(
+                                    prompt,
+                                    top_k=5,
+                                    doc_filter=st.session_state.doc_filter
+                                )
+                                for i, chunk in enumerate(chunks):
+                                    sim = chunk.get("similarity", 0)
+                                    color = "#4ECDC4" if sim > 0.7 else "#ffc107" if sim > 0.5 else "#8b949e"
+                                    st.markdown(f"""
+                                    <div class="chunk-preview" style="border-left: 3px solid {color};">
+                                        <div style="font-size: 10px; color: #8b949e;">
+                                            Similarity: <span style="color: {color};">{sim:.0%}</span> |
+                                            Doc: {chunk.get('document_id', 'N/A')[:15]}...
+                                        </div>
+                                        <div style="margin-top: 4px;">{chunk.get('text', '')[:200]}...</div>
                                     </div>
+                                    """, unsafe_allow_html=True)
+                        # Save to history
+                        st.session_state.messages.append({
+                            "role": "assistant",
+                            "content": response.answer,
+                            "citations": citations,
+                            "metadata": metadata,
+                        })
 # Dynamic suggested questions based on document content
 st.markdown("---")
 # Display as clickable buttons
 sample_cols = st.columns(len(dynamic_questions))
+# Enable suggested questions in both RAG ready and cloud mode
+can_query = rag_system["status"] in ["ready", "cloud"]
 for i, q in enumerate(dynamic_questions):
     with sample_cols[i]:
         # Truncate long questions for button display
         display_q = q if len(q) <= 35 else q[:32] + "..."
         if st.button(display_q, key=f"sample_{i}", use_container_width=True,
+                    disabled=not can_query,
                     help=q if len(q) > 35 else None):
             st.session_state.messages.append({"role": "user", "content": q})
             st.rerun()
 # Show hint about dynamic questions
 if stats.get('total_chunks', 0) > 0:
     st.caption("📌 Questions are generated based on your indexed documents")
+elif rag_mode == "cloud":
+    st.caption("☁️ Cloud mode - try asking any question")
 # Architecture info
 with st.expander("Multi-Agent RAG Architecture"):

demo/rag_config.py CHANGED Viewed

@@ -3,11 +3,16 @@ Unified RAG Configuration for SPARKNET Demo
 This module provides a single source of truth for RAG system configuration,
 ensuring all demo pages use the same vector store, embeddings, and models.
 """
 import streamlit as st
 from pathlib import Path
 import sys
 PROJECT_ROOT = Path(__file__).parent.parent
 sys.path.insert(0, str(PROJECT_ROOT))
@@ -22,11 +27,21 @@ EMBEDDING_MODELS = ["nomic-embed-text", "mxbai-embed-large:latest", "mxbai-embed
 LLM_MODELS = ["llama3.2:latest", "llama3.1:8b", "mistral:latest", "qwen2.5:14b", "qwen2.5:32b"]
 def check_ollama():
     """Check Ollama availability and get available models."""
     try:
         import httpx
-        with httpx.Client(timeout=5.0) as client:
             resp = client.get(f"{OLLAMA_BASE_URL}/api/tags")
             if resp.status_code == 200:
                 models = [m["name"] for m in resp.json().get("models", [])]
@@ -41,114 +56,167 @@ def select_model(available_models: list, preferred_models: list) -> str:
     for model in preferred_models:
         if model in available_models:
             return model
-    # Return first preference as fallback
     return preferred_models[0] if preferred_models else "llama3.2:latest"
 @st.cache_resource
 def get_unified_rag_system():
     """
     Initialize and return the unified RAG system.
     This is cached at the Streamlit level so all pages share the same instance.
     """
     try:
-        # Check for required dependencies first
         try:
-            import pydantic
-        except ImportError:
             return {
-                "status": "error",
-                "error": "Required dependency 'pydantic' is not installed. Please check requirements.txt.",
-                "rag": None,
-                "store": None,
-                "embedder": None,
             }
-        from src.rag.agentic import AgenticRAG, RAGConfig
-        from src.rag.store import get_vector_store, VectorStoreConfig, reset_vector_store
-        from src.rag.embeddings import get_embedding_adapter, EmbeddingConfig, reset_embedding_adapter
-        # Check Ollama
-        ollama_ok, available_models = check_ollama()
-        if not ollama_ok:
             return {
                 "status": "error",
-                "error": "Ollama is not running. Please start Ollama first.",
                 "rag": None,
                 "store": None,
                 "embedder": None,
             }
-        # Select models
-        embed_model = select_model(available_models, EMBEDDING_MODELS)
-        llm_model = select_model(available_models, LLM_MODELS)
-        # Reset singletons to ensure fresh config
-        reset_vector_store()
-        reset_embedding_adapter()
-        # Initialize embedding adapter
-        embed_config = EmbeddingConfig(
-            ollama_model=embed_model,
-            ollama_base_url=OLLAMA_BASE_URL,
-        )
-        embedder = get_embedding_adapter(config=embed_config)
-        # Initialize vector store
-        store_config = VectorStoreConfig(
-            persist_directory=VECTOR_STORE_PATH,
-            collection_name=COLLECTION_NAME,
-            similarity_threshold=0.0,  # No threshold - let reranker handle filtering
-        )
-        store = get_vector_store(config=store_config)
-        # Initialize RAG config
-        rag_config = RAGConfig(
-            model=llm_model,
-            base_url=OLLAMA_BASE_URL,
-            max_revision_attempts=1,
-            enable_query_planning=True,
-            enable_reranking=True,
-            enable_validation=True,
-            retrieval_top_k=10,
-            final_top_k=5,
-            min_confidence=0.3,
-            verbose=False,
-        )
-        # Initialize RAG system
-        rag = AgenticRAG(
-            config=rag_config,
-            vector_store=store,
-            embedding_adapter=embedder,
-        )
         return {
-            "status": "ready",
             "error": None,
-            "rag": rag,
-            "store": store,
-            "embedder": embedder,
-            "embed_model": embed_model,
-            "llm_model": llm_model,
-            "available_models": available_models,
         }
-    except Exception as e:
-        import traceback
         return {
-            "status": "error",
-            "error": f"{str(e)}\n{traceback.format_exc()}",
             "rag": None,
             "store": None,
             "embedder": None,
         }
 def get_store_stats():
     """Get current vector store statistics."""
     system = get_unified_rag_system()
     if system["status"] != "ready":
         return {"total_chunks": 0, "status": "error"}
@@ -166,8 +234,12 @@ def get_store_stats():
 def index_document(text: str, document_id: str, metadata: dict = None) -> dict:
     """Index a document into the unified RAG system."""
     system = get_unified_rag_system()
     if system["status"] != "ready":
-        return {"success": False, "error": system["error"], "num_chunks": 0}
     try:
         num_chunks = system["rag"].index_text(
@@ -183,8 +255,17 @@ def index_document(text: str, document_id: str, metadata: dict = None) -> dict:
 def query_rag(question: str, filters: dict = None):
     """Query the unified RAG system."""
     system = get_unified_rag_system()
     if system["status"] != "ready":
-        return None, system["error"]
     try:
         response = system["rag"].query(question, filters=filters)
@@ -195,7 +276,6 @@ def query_rag(question: str, filters: dict = None):
 def clear_index():
     """Clear the vector store index."""
-    # Force reinitialization by clearing cache
     get_unified_rag_system.clear()
     return True
@@ -207,16 +287,13 @@ def get_indexed_documents() -> list:
         return []
     try:
-        # Query ChromaDB for unique document IDs
         store = system["store"]
         collection = store._collection
-        # Get all metadata to extract unique document_ids
         results = collection.get(include=["metadatas"])
         if not results or not results.get("metadatas"):
             return []
-        doc_ids = set()
         doc_info = {}
         for meta in results["metadatas"]:
             doc_id = meta.get("document_id", "unknown")
@@ -243,7 +320,6 @@ def get_chunks_for_document(document_id: str) -> list:
         store = system["store"]
         collection = store._collection
-        # Query for chunks with this document_id
         results = collection.get(
             where={"document_id": document_id},
             include=["documents", "metadatas"]
@@ -275,15 +351,12 @@ def search_similar_chunks(query: str, top_k: int = 5, doc_filter: str = None):
         embedder = system["embedder"]
         store = system["store"]
-        # Generate query embedding
         query_embedding = embedder.embed_text(query)
-        # Build filter
         filters = None
         if doc_filter:
             filters = {"document_id": doc_filter}
-        # Search
         results = store.search(
             query_embedding=query_embedding,
             top_k=top_k,
@@ -312,7 +385,6 @@ def compute_document_similarity(doc_id_1: str, doc_id_2: str) -> dict:
         return {"error": "RAG system not ready", "similarity": 0.0}
     try:
-        # Get chunks for both documents
         chunks_1 = get_chunks_for_document(doc_id_1)
         chunks_2 = get_chunks_for_document(doc_id_2)
@@ -321,15 +393,13 @@ def compute_document_similarity(doc_id_1: str, doc_id_2: str) -> dict:
         embedder = system["embedder"]
-        # Compute average embeddings for each document
         def avg_embedding(chunks):
             embeddings = []
-            for chunk in chunks[:10]:  # Limit to first 10 chunks
                 emb = embedder.embed_text(chunk["text"])
                 embeddings.append(emb)
             if not embeddings:
                 return None
-            # Average
             import numpy as np
             return np.mean(embeddings, axis=0).tolist()
@@ -339,7 +409,6 @@ def compute_document_similarity(doc_id_1: str, doc_id_2: str) -> dict:
         if emb1 is None or emb2 is None:
             return {"error": "Could not compute embeddings", "similarity": 0.0}
-        # Compute cosine similarity
         import numpy as np
         emb1 = np.array(emb1)
         emb2 = np.array(emb2)
@@ -358,11 +427,12 @@ def compute_document_similarity(doc_id_1: str, doc_id_2: str) -> dict:
 def auto_index_processed_document(doc_id: str, text: str, chunks: list, metadata: dict = None):
     """
     Auto-index a processed document with pre-computed chunks.
-    This is called after document processing completes to immediately
-    make the document available in RAG.
     """
     system = get_unified_rag_system()
     if system["status"] != "ready":
         return {"success": False, "error": "RAG system not ready", "num_chunks": 0}
@@ -370,7 +440,6 @@ def auto_index_processed_document(doc_id: str, text: str, chunks: list, metadata
         store = system["store"]
         embedder = system["embedder"]
-        # Prepare chunks for indexing
         chunk_dicts = []
         embeddings = []
@@ -392,14 +461,12 @@ def auto_index_processed_document(doc_id: str, text: str, chunks: list, metadata
             }
             chunk_dicts.append(chunk_dict)
-            # Generate embedding
             embedding = embedder.embed_text(chunk_text)
             embeddings.append(embedding)
         if not chunk_dicts:
             return {"success": False, "error": "No valid chunks to index", "num_chunks": 0}
-        # Add to store
         store.add_chunks(chunk_dicts, embeddings)
         return {"success": True, "num_chunks": len(chunk_dicts), "error": None}

 This module provides a single source of truth for RAG system configuration,
 ensuring all demo pages use the same vector store, embeddings, and models.
+Supports both:
+1. Local Ollama (for on-premise deployments)
+2. Cloud LLM providers (for Streamlit Cloud)
 """
 import streamlit as st
 from pathlib import Path
 import sys
+import os
 PROJECT_ROOT = Path(__file__).parent.parent
 sys.path.insert(0, str(PROJECT_ROOT))
 LLM_MODELS = ["llama3.2:latest", "llama3.1:8b", "mistral:latest", "qwen2.5:14b", "qwen2.5:32b"]
+def get_secret(key: str, default: str = None):
+    """Get secret from Streamlit secrets or environment."""
+    try:
+        if hasattr(st, 'secrets') and key in st.secrets:
+            return st.secrets[key]
+    except:
+        pass
+    return os.environ.get(key, default)
 def check_ollama():
     """Check Ollama availability and get available models."""
     try:
         import httpx
+        with httpx.Client(timeout=3.0) as client:
             resp = client.get(f"{OLLAMA_BASE_URL}/api/tags")
             if resp.status_code == 200:
                 models = [m["name"] for m in resp.json().get("models", [])]
     for model in preferred_models:
         if model in available_models:
             return model
     return preferred_models[0] if preferred_models else "llama3.2:latest"
+def check_cloud_providers():
+    """Check which cloud LLM providers are available."""
+    providers = {}
+    if get_secret("GROQ_API_KEY"):
+        providers["groq"] = True
+    if get_secret("GOOGLE_API_KEY"):
+        providers["google"] = True
+    if get_secret("OPENROUTER_API_KEY"):
+        providers["openrouter"] = True
+    if get_secret("HF_TOKEN"):
+        providers["huggingface"] = True
+    if get_secret("GITHUB_TOKEN"):
+        providers["github"] = True
+    if get_secret("MISTRAL_API_KEY"):
+        providers["mistral"] = True
+    return providers
 @st.cache_resource
 def get_unified_rag_system():
     """
     Initialize and return the unified RAG system.
     This is cached at the Streamlit level so all pages share the same instance.
+    Supports both Ollama (local) and cloud providers (Streamlit Cloud).
     """
+    # Check for required dependencies first
     try:
+        import pydantic
+    except ImportError:
+        return {
+            "status": "error",
+            "error": "Required dependency 'pydantic' is not installed.",
+            "rag": None,
+            "store": None,
+            "embedder": None,
+            "mode": "error",
+        }
+    # Check Ollama availability
+    ollama_ok, available_models = check_ollama()
+    # Check cloud providers
+    cloud_providers = check_cloud_providers()
+    if ollama_ok:
+        # Use Ollama for full RAG functionality
         try:
+            from src.rag.agentic import AgenticRAG, RAGConfig
+            from src.rag.store import get_vector_store, VectorStoreConfig, reset_vector_store
+            from src.rag.embeddings import get_embedding_adapter, EmbeddingConfig, reset_embedding_adapter
+            # Select models
+            embed_model = select_model(available_models, EMBEDDING_MODELS)
+            llm_model = select_model(available_models, LLM_MODELS)
+            # Reset singletons to ensure fresh config
+            reset_vector_store()
+            reset_embedding_adapter()
+            # Initialize embedding adapter
+            embed_config = EmbeddingConfig(
+                ollama_model=embed_model,
+                ollama_base_url=OLLAMA_BASE_URL,
+            )
+            embedder = get_embedding_adapter(config=embed_config)
+            # Initialize vector store
+            store_config = VectorStoreConfig(
+                persist_directory=VECTOR_STORE_PATH,
+                collection_name=COLLECTION_NAME,
+                similarity_threshold=0.0,
+            )
+            store = get_vector_store(config=store_config)
+            # Initialize RAG config
+            rag_config = RAGConfig(
+                model=llm_model,
+                base_url=OLLAMA_BASE_URL,
+                max_revision_attempts=1,
+                enable_query_planning=True,
+                enable_reranking=True,
+                enable_validation=True,
+                retrieval_top_k=10,
+                final_top_k=5,
+                min_confidence=0.3,
+                verbose=False,
+            )
+            # Initialize RAG system
+            rag = AgenticRAG(
+                config=rag_config,
+                vector_store=store,
+                embedding_adapter=embedder,
+            )
             return {
+                "status": "ready",
+                "error": None,
+                "rag": rag,
+                "store": store,
+                "embedder": embedder,
+                "embed_model": embed_model,
+                "llm_model": llm_model,
+                "available_models": available_models,
+                "mode": "ollama",
             }
+        except Exception as e:
+            import traceback
             return {
                 "status": "error",
+                "error": f"Ollama RAG init failed: {str(e)}",
                 "rag": None,
                 "store": None,
                 "embedder": None,
+                "mode": "error",
             }
+    elif cloud_providers:
+        # Cloud mode - use cloud LLM providers
+        # RAG with vector store requires local processing
+        # but we can still do basic document Q&A with cloud LLMs
         return {
+            "status": "cloud",
             "error": None,
+            "rag": None,
+            "store": None,
+            "embedder": None,
+            "mode": "cloud",
+            "providers": list(cloud_providers.keys()),
+            "message": "Running in cloud mode. Document Q&A available via cloud LLM providers.",
         }
+    else:
+        # No backend available
         return {
+            "status": "demo",
+            "error": "No LLM backend configured. Add API keys to secrets.toml or start Ollama.",
             "rag": None,
             "store": None,
             "embedder": None,
+            "mode": "demo",
         }
 def get_store_stats():
     """Get current vector store statistics."""
     system = get_unified_rag_system()
+    if system["mode"] == "cloud":
+        return {
+            "total_chunks": 0,
+            "status": "cloud",
+            "message": "Cloud mode - indexing requires Ollama",
+        }
     if system["status"] != "ready":
         return {"total_chunks": 0, "status": "error"}
 def index_document(text: str, document_id: str, metadata: dict = None) -> dict:
     """Index a document into the unified RAG system."""
     system = get_unified_rag_system()
+    if system["mode"] == "cloud":
+        return {"success": False, "error": "Indexing requires Ollama", "num_chunks": 0}
     if system["status"] != "ready":
+        return {"success": False, "error": system.get("error", "RAG not ready"), "num_chunks": 0}
     try:
         num_chunks = system["rag"].index_text(
 def query_rag(question: str, filters: dict = None):
     """Query the unified RAG system."""
     system = get_unified_rag_system()
+    if system["mode"] == "cloud":
+        # Use cloud LLM for Q&A
+        from llm_providers import generate_response
+        response, error = generate_response(question)
+        if error:
+            return None, error
+        return {"answer": response, "sources": [], "mode": "cloud"}, None
     if system["status"] != "ready":
+        return None, system.get("error", "RAG not ready")
     try:
         response = system["rag"].query(question, filters=filters)
 def clear_index():
     """Clear the vector store index."""
     get_unified_rag_system.clear()
     return True
         return []
     try:
         store = system["store"]
         collection = store._collection
         results = collection.get(include=["metadatas"])
         if not results or not results.get("metadatas"):
             return []
         doc_info = {}
         for meta in results["metadatas"]:
             doc_id = meta.get("document_id", "unknown")
         store = system["store"]
         collection = store._collection
         results = collection.get(
             where={"document_id": document_id},
             include=["documents", "metadatas"]
         embedder = system["embedder"]
         store = system["store"]
         query_embedding = embedder.embed_text(query)
         filters = None
         if doc_filter:
             filters = {"document_id": doc_filter}
         results = store.search(
             query_embedding=query_embedding,
             top_k=top_k,
         return {"error": "RAG system not ready", "similarity": 0.0}
     try:
         chunks_1 = get_chunks_for_document(doc_id_1)
         chunks_2 = get_chunks_for_document(doc_id_2)
         embedder = system["embedder"]
         def avg_embedding(chunks):
             embeddings = []
+            for chunk in chunks[:10]:
                 emb = embedder.embed_text(chunk["text"])
                 embeddings.append(emb)
             if not embeddings:
                 return None
             import numpy as np
             return np.mean(embeddings, axis=0).tolist()
         if emb1 is None or emb2 is None:
             return {"error": "Could not compute embeddings", "similarity": 0.0}
         import numpy as np
         emb1 = np.array(emb1)
         emb2 = np.array(emb2)
 def auto_index_processed_document(doc_id: str, text: str, chunks: list, metadata: dict = None):
     """
     Auto-index a processed document with pre-computed chunks.
     """
     system = get_unified_rag_system()
+    if system["mode"] == "cloud":
+        return {"success": False, "error": "Indexing requires Ollama", "num_chunks": 0}
     if system["status"] != "ready":
         return {"success": False, "error": "RAG system not ready", "num_chunks": 0}
         store = system["store"]
         embedder = system["embedder"]
         chunk_dicts = []
         embeddings = []
             }
             chunk_dicts.append(chunk_dict)
             embedding = embedder.embed_text(chunk_text)
             embeddings.append(embedding)
         if not chunk_dicts:
             return {"success": False, "error": "No valid chunks to index", "num_chunks": 0}
         store.add_chunks(chunk_dicts, embeddings)
         return {"success": True, "num_chunks": len(chunk_dicts), "error": None}

demo/requirements.txt CHANGED Viewed

@@ -1,19 +1,76 @@
-# SPARKNET Demo Requirements
-# Run: pip install -r demo/requirements.txt
-# Streamlit
 streamlit>=1.28.0
-# Data handling
-pandas>=2.0.0
-numpy>=1.24.0
-# HTTP client (for Ollama checks)
 httpx>=0.25.0
-# Image handling (optional, for advanced features)
-Pillow>=10.0.0
-# Charts (optional)
 plotly>=5.18.0
 altair>=5.2.0

+# SPARKNET Demo Requirements for Streamlit Cloud
+# This file is used by Streamlit Cloud for deployment
+# ==============================================================================
+# Streamlit Web Framework
+# ==============================================================================
 streamlit>=1.28.0
+# ==============================================================================
+# Data Validation & Configuration (REQUIRED)
+# ==============================================================================
+pydantic>=2.0.0
+pydantic-settings>=2.0.0
+pyyaml>=6.0
+python-dotenv>=1.0.0
+typing-extensions>=4.0.0
+# ==============================================================================
+# LLM Orchestration (LangChain Ecosystem)
+# ==============================================================================
+langchain>=0.1.0
+langchain-community>=0.0.20
+langchain-ollama>=0.0.1
+langgraph>=0.0.20
+ollama>=0.1.0
+# ==============================================================================
+# Vector Stores & Embeddings
+# ==============================================================================
+chromadb>=0.4.0
+faiss-cpu>=1.7.4
+sentence-transformers>=2.2.0
+# ==============================================================================
+# PDF & Document Processing
+# ==============================================================================
+pymupdf>=1.23.0
+reportlab>=4.0.0
+# ==============================================================================
+# Observability & Logging
+# ==============================================================================
+loguru>=0.7.0
+rich>=13.0.0
+# ==============================================================================
+# Web & HTTP
+# ==============================================================================
+requests>=2.31.0
 httpx>=0.25.0
+beautifulsoup4>=4.12.0
+# ==============================================================================
+# Data Handling
+# ==============================================================================
+pandas>=2.0.0
+numpy>=1.24.0
+# ==============================================================================
+# Image & Charts
+# ==============================================================================
+Pillow>=10.0.0
 plotly>=5.18.0
 altair>=5.2.0
+# ==============================================================================
+# System & Caching
+# ==============================================================================
+psutil>=5.9.0
+cachetools>=5.3.0
+tenacity>=8.2.0
+# ==============================================================================
+# Workflow
+# ==============================================================================
+networkx>=3.0

demo/state_manager.py CHANGED Viewed

@@ -654,14 +654,18 @@ def render_global_status_bar():
     # Import RAG config for additional status
     try:
-        from rag_config import get_unified_rag_system, check_ollama
         rag_system = get_unified_rag_system()
         ollama_ok, models = check_ollama()
         rag_status = rag_system["status"]
         llm_model = rag_system.get("llm_model", "N/A")
     except:
         ollama_ok = False
         rag_status = "error"
         llm_model = "N/A"
         models = []
@@ -671,17 +675,29 @@ def render_global_status_bar():
     with cols[0]:
         if ollama_ok:
             st.success(f"Ollama ({len(models)})")
         else:
-            st.error("Ollama Offline")
     with cols[1]:
         if rag_status == "ready":
             st.success("RAG Ready")
         else:
             st.error("RAG Error")
     with cols[2]:
-        st.info(f"{llm_model.split(':')[0]}")
     with cols[3]:
         st.info(f"{summary['total_documents']} Docs")

     # Import RAG config for additional status
     try:
+        from rag_config import get_unified_rag_system, check_ollama, check_cloud_providers
         rag_system = get_unified_rag_system()
         ollama_ok, models = check_ollama()
+        cloud_providers = check_cloud_providers()
         rag_status = rag_system["status"]
+        rag_mode = rag_system.get("mode", "error")
         llm_model = rag_system.get("llm_model", "N/A")
     except:
         ollama_ok = False
+        cloud_providers = {}
         rag_status = "error"
+        rag_mode = "error"
         llm_model = "N/A"
         models = []
     with cols[0]:
         if ollama_ok:
             st.success(f"Ollama ({len(models)})")
+        elif cloud_providers:
+            st.info(f"Cloud ({len(cloud_providers)})")
         else:
+            st.warning("Demo Mode")
     with cols[1]:
         if rag_status == "ready":
             st.success("RAG Ready")
+        elif rag_mode == "cloud":
+            st.info("Cloud LLM")
+        elif rag_mode == "demo":
+            st.warning("Demo Mode")
         else:
             st.error("RAG Error")
     with cols[2]:
+        if rag_mode == "cloud" and cloud_providers:
+            provider_name = list(cloud_providers.keys())[0].title()
+            st.info(f"{provider_name}")
+        elif llm_model != "N/A":
+            st.info(f"{llm_model.split(':')[0]}")
+        else:
+            st.info("Offline")
     with cols[3]:
         st.info(f"{summary['total_documents']} Docs")