MHamdan commited on
Commit
a2fb804
·
1 Parent(s): 667e85c

Fix Streamlit Cloud deployment - add cloud mode support

Browse files
demo/pages/1_🔬_Live_Processing.py CHANGED
@@ -345,23 +345,43 @@ with col_status:
345
 
346
  ollama_ok, models = check_ollama()
347
  rag_system = get_unified_rag_system()
 
 
 
 
 
 
 
 
348
 
349
  status_cols = st.columns(2)
350
  with status_cols[0]:
351
  if ollama_ok:
352
  st.success(f"Ollama ({len(models)})")
 
 
353
  else:
354
- st.error("Ollama Offline")
355
  with status_cols[1]:
356
  if rag_system["status"] == "ready":
357
  st.success("RAG Ready")
 
 
 
 
358
  else:
359
  st.error("RAG Error")
360
 
361
  # State summary
362
  summary = state_manager.get_summary()
363
  st.metric("Processed Docs", summary["total_documents"])
364
- st.metric("Indexed Chunks", summary["total_indexed_chunks"])
 
 
 
 
 
 
365
 
366
  st.markdown("---")
367
 
@@ -503,22 +523,29 @@ if file_to_process and st.button("🚀 Start Processing", type="primary", use_co
503
 
504
  # Stage 5: RAG Indexing
505
  indexed_count = 0
506
- if auto_index and rag_system["status"] == "ready" and chunks:
507
- status_text.markdown("**📚 Indexing to RAG...**")
508
- state_manager.update_processing(doc_id, "indexing", 0.9, "Indexing to RAG...")
509
- progress_bar.progress(90)
510
-
511
- # Auto-index
512
- index_result = auto_index_processed_document(
513
- doc_id=doc_id,
514
- text=processing_result.get("raw_text", ""),
515
- chunks=chunks,
516
- metadata={"filename": filename, "source": file_to_process}
517
- )
518
-
519
- if index_result["success"]:
520
- indexed_count = index_result["num_chunks"]
521
- state_manager.mark_indexed(doc_id, indexed_count)
 
 
 
 
 
 
 
522
 
523
  # Complete
524
  progress_bar.progress(100)
 
345
 
346
  ollama_ok, models = check_ollama()
347
  rag_system = get_unified_rag_system()
348
+ rag_mode = rag_system.get("mode", "error")
349
+
350
+ # Check for cloud providers
351
+ try:
352
+ from rag_config import check_cloud_providers
353
+ cloud_providers = check_cloud_providers()
354
+ except:
355
+ cloud_providers = {}
356
 
357
  status_cols = st.columns(2)
358
  with status_cols[0]:
359
  if ollama_ok:
360
  st.success(f"Ollama ({len(models)})")
361
+ elif cloud_providers:
362
+ st.info(f"Cloud ({len(cloud_providers)})")
363
  else:
364
+ st.warning("Demo Mode")
365
  with status_cols[1]:
366
  if rag_system["status"] == "ready":
367
  st.success("RAG Ready")
368
+ elif rag_mode == "cloud":
369
+ st.info("Cloud LLM")
370
+ elif rag_mode == "demo":
371
+ st.warning("Demo Mode")
372
  else:
373
  st.error("RAG Error")
374
 
375
  # State summary
376
  summary = state_manager.get_summary()
377
  st.metric("Processed Docs", summary["total_documents"])
378
+
379
+ # Show different metrics based on mode
380
+ if rag_mode == "cloud":
381
+ st.metric("Cloud Providers", len(cloud_providers))
382
+ st.caption("RAG indexing requires Ollama")
383
+ else:
384
+ st.metric("Indexed Chunks", summary["total_indexed_chunks"])
385
 
386
  st.markdown("---")
387
 
 
523
 
524
  # Stage 5: RAG Indexing
525
  indexed_count = 0
526
+ if auto_index and chunks:
527
+ if rag_system["status"] == "ready":
528
+ status_text.markdown("**📚 Indexing to RAG...**")
529
+ state_manager.update_processing(doc_id, "indexing", 0.9, "Indexing to RAG...")
530
+ progress_bar.progress(90)
531
+
532
+ # Auto-index
533
+ index_result = auto_index_processed_document(
534
+ doc_id=doc_id,
535
+ text=processing_result.get("raw_text", ""),
536
+ chunks=chunks,
537
+ metadata={"filename": filename, "source": file_to_process}
538
+ )
539
+
540
+ if index_result["success"]:
541
+ indexed_count = index_result["num_chunks"]
542
+ state_manager.mark_indexed(doc_id, indexed_count)
543
+ elif rag_mode == "cloud":
544
+ status_text.markdown("**☁️ Cloud mode - skipping RAG indexing...**")
545
+ state_manager.update_processing(doc_id, "indexing", 0.9, "Cloud mode - no indexing")
546
+ progress_bar.progress(90)
547
+ # In cloud mode, document is processed but not indexed
548
+ # Users can still query documents via cloud LLM
549
 
550
  # Complete
551
  progress_bar.progress(100)
demo/pages/2_💬_Interactive_RAG.py CHANGED
@@ -354,6 +354,14 @@ rag_system = get_unified_rag_system()
354
  ollama_ok, models = check_ollama()
355
  stats = get_store_stats()
356
  indexed_docs = get_indexed_documents()
 
 
 
 
 
 
 
 
357
 
358
  # Session state
359
  if "messages" not in st.session_state:
@@ -393,25 +401,45 @@ cols = st.columns(5)
393
  with cols[0]:
394
  if ollama_ok:
395
  st.success(f"Ollama ({len(models)})")
 
 
396
  else:
397
- st.error("Ollama Offline")
398
  with cols[1]:
399
  if rag_system["status"] == "ready":
400
  st.success("RAG Ready")
 
 
 
 
401
  else:
402
  st.error("RAG Error")
403
  with cols[2]:
404
- st.info(f"{rag_system.get('llm_model', 'N/A').split(':')[0]}")
 
 
 
 
405
  with cols[3]:
406
  chunk_count = stats.get('total_chunks', 0)
407
  if chunk_count > 0:
408
  st.success(f"{chunk_count} Chunks")
 
 
409
  else:
410
  st.warning("0 Chunks")
411
  with cols[4]:
412
- st.info(f"{rag_system.get('embed_model', 'N/A').split(':')[0]}")
413
-
414
- if rag_system["status"] == "error":
 
 
 
 
 
 
 
 
415
  with st.expander("RAG Error Details"):
416
  st.code(rag_system["error"])
417
 
@@ -616,8 +644,9 @@ else:
616
  st.markdown(pending_query)
617
 
618
  with st.chat_message("assistant"):
619
- if rag_system["status"] != "ready":
620
- st.error("RAG system not ready")
 
621
  st.session_state.messages.append({"role": "assistant", "content": "RAG system not ready"})
622
  else:
623
  # Show progress
@@ -645,49 +674,64 @@ else:
645
  st.error(f"Error: {error}")
646
  st.session_state.messages.append({"role": "assistant", "content": f"Error: {error}"})
647
  elif response:
648
- # Display answer
649
- st.markdown(response.answer)
650
-
651
- # Build metadata
652
- metadata = {
653
- "latency_ms": response.latency_ms,
654
- "num_sources": response.num_sources,
655
- "confidence": response.confidence,
656
- "validated": response.validated,
657
- }
658
-
659
- # Display metrics
660
- if show_metrics:
661
- m_cols = st.columns(4)
662
- with m_cols[0]:
663
- st.markdown(f'<div class="metric-box"><div class="metric-value">{metadata.get("latency_ms", 0):.0f}ms</div><div class="metric-label">Latency</div></div>', unsafe_allow_html=True)
664
- with m_cols[1]:
665
- st.markdown(f'<div class="metric-box"><div class="metric-value">{metadata.get("num_sources", 0)}</div><div class="metric-label">Sources</div></div>', unsafe_allow_html=True)
666
- with m_cols[2]:
667
- conf = metadata.get("confidence", 0)
668
- color = "#4ECDC4" if conf > 0.6 else "#ffc107" if conf > 0.3 else "#dc3545"
669
- st.markdown(f'<div class="metric-box"><div class="metric-value" style="color:{color}">{conf:.0%}</div><div class="metric-label">Confidence</div></div>', unsafe_allow_html=True)
670
- with m_cols[3]:
671
- val = "" if metadata.get("validated") else "?"
672
- st.markdown(f'<div class="metric-box"><div class="metric-value">{val}</div><div class="metric-label">Validated</div></div>', unsafe_allow_html=True)
673
-
674
- # Build citations list
675
- citations = []
676
- if hasattr(response, 'citations') and response.citations:
677
- for i, cite in enumerate(response.citations):
678
- citations.append({
679
- "index": i + 1,
680
- "text_snippet": cite.text_snippet if hasattr(cite, 'text_snippet') else str(cite),
681
- "relevance_score": cite.relevance_score if hasattr(cite, 'relevance_score') else 0.0,
682
- })
683
-
684
- # Store message with metadata
685
- st.session_state.messages.append({
686
- "role": "assistant",
687
- "content": response.answer,
688
- "metadata": metadata,
689
- "citations": citations,
690
- })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
691
  else:
692
  st.warning("No response from RAG system")
693
  st.session_state.messages.append({"role": "assistant", "content": "No response from RAG system"})
@@ -701,18 +745,23 @@ else:
701
  st.markdown(prompt)
702
 
703
  with st.chat_message("assistant"):
704
- if rag_system["status"] != "ready":
705
- st.error("RAG system not ready")
 
706
  st.session_state.messages.append({"role": "assistant", "content": "RAG system not ready"})
707
  else:
708
- # Show progress
709
  progress = st.progress(0)
710
  status = st.empty()
711
 
712
- stages = ["Planning", "Retrieving", "Reranking", "Generating", "Validating"]
 
 
 
 
713
  for i, stage in enumerate(stages):
714
  status.markdown(f"**{stage}...**")
715
- progress.progress((i + 1) * 20)
716
  time.sleep(0.15)
717
 
718
  # Build filters for document
@@ -730,79 +779,94 @@ else:
730
  st.error(f"Error: {error}")
731
  st.session_state.messages.append({"role": "assistant", "content": f"Error: {error}"})
732
  elif response:
733
- # Display answer
734
- st.markdown(response.answer)
735
-
736
- # Build metadata
737
- metadata = {
738
- "latency_ms": response.latency_ms,
739
- "num_sources": response.num_sources,
740
- "confidence": response.confidence,
741
- "validated": response.validated,
742
- }
743
-
744
- # Display metrics
745
- if show_metrics:
746
- m_cols = st.columns(4)
747
- with m_cols[0]:
748
- st.markdown(f'<div class="metric-box"><div class="metric-value">{response.latency_ms:.0f}ms</div><div class="metric-label">Latency</div></div>', unsafe_allow_html=True)
749
- with m_cols[1]:
750
- st.markdown(f'<div class="metric-box"><div class="metric-value">{response.num_sources}</div><div class="metric-label">Sources</div></div>', unsafe_allow_html=True)
751
- with m_cols[2]:
752
- conf_color = "#4ECDC4" if response.confidence > 0.6 else "#ffc107" if response.confidence > 0.3 else "#dc3545"
753
- st.markdown(f'<div class="metric-box"><div class="metric-value" style="color:{conf_color}">{response.confidence:.0%}</div><div class="metric-label">Confidence</div></div>', unsafe_allow_html=True)
754
- with m_cols[3]:
755
- val_icon = "" if response.validated else "?"
756
- st.markdown(f'<div class="metric-box"><div class="metric-value">{val_icon}</div><div class="metric-label">Validated</div></div>', unsafe_allow_html=True)
757
-
758
- # Display sources
759
- citations = []
760
- if show_sources and response.citations:
761
- with st.expander(f"Sources ({len(response.citations)})"):
762
- for i, cite in enumerate(response.citations):
763
- color = get_chunk_color(i)
764
- citations.append({
765
- "index": cite.index,
766
- "relevance_score": cite.relevance_score,
767
- "text_snippet": cite.text_snippet,
768
- })
769
- st.markdown(f"""
770
- <div class="source-card" style="border-left-color: {color};">
771
- <div class="source-header">
772
- <strong>[{cite.index}]</strong> • Relevance: {cite.relevance_score:.0%}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
773
  </div>
774
- <div class="source-text">{cite.text_snippet[:300]}...</div>
775
- </div>
776
- """, unsafe_allow_html=True)
777
-
778
- # Chunk preview (semantic search)
779
- if show_chunk_preview:
780
- with st.expander("Chunk Preview (Top Matches)"):
781
- chunks = search_similar_chunks(
782
- prompt,
783
- top_k=5,
784
- doc_filter=st.session_state.doc_filter
785
- )
786
- for i, chunk in enumerate(chunks):
787
- sim = chunk.get("similarity", 0)
788
- color = "#4ECDC4" if sim > 0.7 else "#ffc107" if sim > 0.5 else "#8b949e"
789
- st.markdown(f"""
790
- <div class="chunk-preview" style="border-left: 3px solid {color};">
791
- <div style="font-size: 10px; color: #8b949e;">
792
- Similarity: <span style="color: {color};">{sim:.0%}</span> |
793
- Doc: {chunk.get('document_id', 'N/A')[:15]}...
794
  </div>
795
- <div style="margin-top: 4px;">{chunk.get('text', '')[:200]}...</div>
796
- </div>
797
- """, unsafe_allow_html=True)
798
 
799
- # Save to history
800
- st.session_state.messages.append({
801
- "role": "assistant",
802
- "content": response.answer,
803
- "citations": citations,
804
- "metadata": metadata,
805
- })
806
 
807
  # Dynamic suggested questions based on document content
808
  st.markdown("---")
@@ -817,12 +881,14 @@ dynamic_questions = generate_dynamic_questions(state_manager, indexed_docs, max_
817
 
818
  # Display as clickable buttons
819
  sample_cols = st.columns(len(dynamic_questions))
 
 
820
  for i, q in enumerate(dynamic_questions):
821
  with sample_cols[i]:
822
  # Truncate long questions for button display
823
  display_q = q if len(q) <= 35 else q[:32] + "..."
824
  if st.button(display_q, key=f"sample_{i}", use_container_width=True,
825
- disabled=(stats.get('total_chunks', 0) == 0),
826
  help=q if len(q) > 35 else None):
827
  st.session_state.messages.append({"role": "user", "content": q})
828
  st.rerun()
@@ -830,6 +896,8 @@ for i, q in enumerate(dynamic_questions):
830
  # Show hint about dynamic questions
831
  if stats.get('total_chunks', 0) > 0:
832
  st.caption("📌 Questions are generated based on your indexed documents")
 
 
833
 
834
  # Architecture info
835
  with st.expander("Multi-Agent RAG Architecture"):
 
354
  ollama_ok, models = check_ollama()
355
  stats = get_store_stats()
356
  indexed_docs = get_indexed_documents()
357
+ rag_mode = rag_system.get("mode", "error")
358
+
359
+ # Check for cloud providers
360
+ try:
361
+ from rag_config import check_cloud_providers
362
+ cloud_providers = check_cloud_providers()
363
+ except:
364
+ cloud_providers = {}
365
 
366
  # Session state
367
  if "messages" not in st.session_state:
 
401
  with cols[0]:
402
  if ollama_ok:
403
  st.success(f"Ollama ({len(models)})")
404
+ elif cloud_providers:
405
+ st.info(f"Cloud ({len(cloud_providers)})")
406
  else:
407
+ st.warning("Demo Mode")
408
  with cols[1]:
409
  if rag_system["status"] == "ready":
410
  st.success("RAG Ready")
411
+ elif rag_mode == "cloud":
412
+ st.info("Cloud LLM")
413
+ elif rag_mode == "demo":
414
+ st.warning("Demo Mode")
415
  else:
416
  st.error("RAG Error")
417
  with cols[2]:
418
+ if rag_mode == "cloud" and cloud_providers:
419
+ provider_name = list(cloud_providers.keys())[0].title()
420
+ st.info(f"{provider_name}")
421
+ else:
422
+ st.info(f"{rag_system.get('llm_model', 'N/A').split(':')[0]}")
423
  with cols[3]:
424
  chunk_count = stats.get('total_chunks', 0)
425
  if chunk_count > 0:
426
  st.success(f"{chunk_count} Chunks")
427
+ elif rag_mode == "cloud":
428
+ st.info("Cloud Q&A")
429
  else:
430
  st.warning("0 Chunks")
431
  with cols[4]:
432
+ if rag_mode == "cloud":
433
+ st.info("Cloud Embed")
434
+ else:
435
+ st.info(f"{rag_system.get('embed_model', 'N/A').split(':')[0]}")
436
+
437
+ # Show cloud mode message
438
+ if rag_mode == "cloud":
439
+ st.info("☁️ **Cloud Mode**: Using cloud LLM providers for Q&A. Document indexing requires Ollama.")
440
+ elif rag_mode == "demo":
441
+ st.warning("📋 **Demo Mode**: Add API keys to secrets.toml or start Ollama for full functionality.")
442
+ elif rag_system["status"] == "error":
443
  with st.expander("RAG Error Details"):
444
  st.code(rag_system["error"])
445
 
 
644
  st.markdown(pending_query)
645
 
646
  with st.chat_message("assistant"):
647
+ # Allow queries in both "ready" (Ollama) and "cloud" modes
648
+ if rag_system["status"] not in ["ready", "cloud"]:
649
+ st.error("RAG system not ready. Please configure Ollama or add cloud API keys.")
650
  st.session_state.messages.append({"role": "assistant", "content": "RAG system not ready"})
651
  else:
652
  # Show progress
 
674
  st.error(f"Error: {error}")
675
  st.session_state.messages.append({"role": "assistant", "content": f"Error: {error}"})
676
  elif response:
677
+ # Handle both RAG response objects and cloud mode dict responses
678
+ is_cloud_response = isinstance(response, dict) and response.get("mode") == "cloud"
679
+
680
+ if is_cloud_response:
681
+ answer = response.get("answer", "")
682
+ st.markdown(answer)
683
+ st.info("☁️ *Response from cloud LLM (no document retrieval)*")
684
+
685
+ st.session_state.messages.append({
686
+ "role": "assistant",
687
+ "content": answer,
688
+ "metadata": {"mode": "cloud"},
689
+ "citations": [],
690
+ })
691
+ else:
692
+ # Display answer
693
+ st.markdown(response.answer)
694
+
695
+ # Build metadata
696
+ metadata = {
697
+ "latency_ms": response.latency_ms,
698
+ "num_sources": response.num_sources,
699
+ "confidence": response.confidence,
700
+ "validated": response.validated,
701
+ }
702
+
703
+ # Display metrics
704
+ if show_metrics:
705
+ m_cols = st.columns(4)
706
+ with m_cols[0]:
707
+ st.markdown(f'<div class="metric-box"><div class="metric-value">{metadata.get("latency_ms", 0):.0f}ms</div><div class="metric-label">Latency</div></div>', unsafe_allow_html=True)
708
+ with m_cols[1]:
709
+ st.markdown(f'<div class="metric-box"><div class="metric-value">{metadata.get("num_sources", 0)}</div><div class="metric-label">Sources</div></div>', unsafe_allow_html=True)
710
+ with m_cols[2]:
711
+ conf = metadata.get("confidence", 0)
712
+ color = "#4ECDC4" if conf > 0.6 else "#ffc107" if conf > 0.3 else "#dc3545"
713
+ st.markdown(f'<div class="metric-box"><div class="metric-value" style="color:{color}">{conf:.0%}</div><div class="metric-label">Confidence</div></div>', unsafe_allow_html=True)
714
+ with m_cols[3]:
715
+ val = "" if metadata.get("validated") else "?"
716
+ st.markdown(f'<div class="metric-box"><div class="metric-value">{val}</div><div class="metric-label">Validated</div></div>', unsafe_allow_html=True)
717
+
718
+ # Build citations list
719
+ citations = []
720
+ if hasattr(response, 'citations') and response.citations:
721
+ for i, cite in enumerate(response.citations):
722
+ citations.append({
723
+ "index": i + 1,
724
+ "text_snippet": cite.text_snippet if hasattr(cite, 'text_snippet') else str(cite),
725
+ "relevance_score": cite.relevance_score if hasattr(cite, 'relevance_score') else 0.0,
726
+ })
727
+
728
+ # Store message with metadata
729
+ st.session_state.messages.append({
730
+ "role": "assistant",
731
+ "content": response.answer,
732
+ "metadata": metadata,
733
+ "citations": citations,
734
+ })
735
  else:
736
  st.warning("No response from RAG system")
737
  st.session_state.messages.append({"role": "assistant", "content": "No response from RAG system"})
 
745
  st.markdown(prompt)
746
 
747
  with st.chat_message("assistant"):
748
+ # Allow queries in both "ready" (Ollama) and "cloud" modes
749
+ if rag_system["status"] not in ["ready", "cloud"]:
750
+ st.error("RAG system not ready. Please configure Ollama or add cloud API keys.")
751
  st.session_state.messages.append({"role": "assistant", "content": "RAG system not ready"})
752
  else:
753
+ # Show progress - different stages for cloud vs RAG mode
754
  progress = st.progress(0)
755
  status = st.empty()
756
 
757
+ if rag_mode == "cloud":
758
+ stages = ["Processing", "Generating"]
759
+ else:
760
+ stages = ["Planning", "Retrieving", "Reranking", "Generating", "Validating"]
761
+
762
  for i, stage in enumerate(stages):
763
  status.markdown(f"**{stage}...**")
764
+ progress.progress(int((i + 1) * 100 / len(stages)))
765
  time.sleep(0.15)
766
 
767
  # Build filters for document
 
779
  st.error(f"Error: {error}")
780
  st.session_state.messages.append({"role": "assistant", "content": f"Error: {error}"})
781
  elif response:
782
+ # Handle both RAG response objects and cloud mode dict responses
783
+ is_cloud_response = isinstance(response, dict) and response.get("mode") == "cloud"
784
+
785
+ if is_cloud_response:
786
+ answer = response.get("answer", "")
787
+ st.markdown(answer)
788
+ st.info("☁️ *Response from cloud LLM (no document retrieval)*")
789
+
790
+ st.session_state.messages.append({
791
+ "role": "assistant",
792
+ "content": answer,
793
+ "metadata": {"mode": "cloud"},
794
+ "citations": [],
795
+ })
796
+ else:
797
+ # Display answer
798
+ st.markdown(response.answer)
799
+
800
+ # Build metadata
801
+ metadata = {
802
+ "latency_ms": response.latency_ms,
803
+ "num_sources": response.num_sources,
804
+ "confidence": response.confidence,
805
+ "validated": response.validated,
806
+ }
807
+
808
+ # Display metrics
809
+ if show_metrics:
810
+ m_cols = st.columns(4)
811
+ with m_cols[0]:
812
+ st.markdown(f'<div class="metric-box"><div class="metric-value">{response.latency_ms:.0f}ms</div><div class="metric-label">Latency</div></div>', unsafe_allow_html=True)
813
+ with m_cols[1]:
814
+ st.markdown(f'<div class="metric-box"><div class="metric-value">{response.num_sources}</div><div class="metric-label">Sources</div></div>', unsafe_allow_html=True)
815
+ with m_cols[2]:
816
+ conf_color = "#4ECDC4" if response.confidence > 0.6 else "#ffc107" if response.confidence > 0.3 else "#dc3545"
817
+ st.markdown(f'<div class="metric-box"><div class="metric-value" style="color:{conf_color}">{response.confidence:.0%}</div><div class="metric-label">Confidence</div></div>', unsafe_allow_html=True)
818
+ with m_cols[3]:
819
+ val_icon = "" if response.validated else "?"
820
+ st.markdown(f'<div class="metric-box"><div class="metric-value">{val_icon}</div><div class="metric-label">Validated</div></div>', unsafe_allow_html=True)
821
+
822
+ # Display sources
823
+ citations = []
824
+ if show_sources and response.citations:
825
+ with st.expander(f"Sources ({len(response.citations)})"):
826
+ for i, cite in enumerate(response.citations):
827
+ color = get_chunk_color(i)
828
+ citations.append({
829
+ "index": cite.index,
830
+ "relevance_score": cite.relevance_score,
831
+ "text_snippet": cite.text_snippet,
832
+ })
833
+ st.markdown(f"""
834
+ <div class="source-card" style="border-left-color: {color};">
835
+ <div class="source-header">
836
+ <strong>[{cite.index}]</strong> • Relevance: {cite.relevance_score:.0%}
837
+ </div>
838
+ <div class="source-text">{cite.text_snippet[:300]}...</div>
839
  </div>
840
+ """, unsafe_allow_html=True)
841
+
842
+ # Chunk preview (semantic search) - only for non-cloud mode
843
+ if show_chunk_preview and not is_cloud_response:
844
+ with st.expander("Chunk Preview (Top Matches)"):
845
+ chunks = search_similar_chunks(
846
+ prompt,
847
+ top_k=5,
848
+ doc_filter=st.session_state.doc_filter
849
+ )
850
+ for i, chunk in enumerate(chunks):
851
+ sim = chunk.get("similarity", 0)
852
+ color = "#4ECDC4" if sim > 0.7 else "#ffc107" if sim > 0.5 else "#8b949e"
853
+ st.markdown(f"""
854
+ <div class="chunk-preview" style="border-left: 3px solid {color};">
855
+ <div style="font-size: 10px; color: #8b949e;">
856
+ Similarity: <span style="color: {color};">{sim:.0%}</span> |
857
+ Doc: {chunk.get('document_id', 'N/A')[:15]}...
858
+ </div>
859
+ <div style="margin-top: 4px;">{chunk.get('text', '')[:200]}...</div>
860
  </div>
861
+ """, unsafe_allow_html=True)
 
 
862
 
863
+ # Save to history
864
+ st.session_state.messages.append({
865
+ "role": "assistant",
866
+ "content": response.answer,
867
+ "citations": citations,
868
+ "metadata": metadata,
869
+ })
870
 
871
  # Dynamic suggested questions based on document content
872
  st.markdown("---")
 
881
 
882
  # Display as clickable buttons
883
  sample_cols = st.columns(len(dynamic_questions))
884
+ # Enable suggested questions in both RAG ready and cloud mode
885
+ can_query = rag_system["status"] in ["ready", "cloud"]
886
  for i, q in enumerate(dynamic_questions):
887
  with sample_cols[i]:
888
  # Truncate long questions for button display
889
  display_q = q if len(q) <= 35 else q[:32] + "..."
890
  if st.button(display_q, key=f"sample_{i}", use_container_width=True,
891
+ disabled=not can_query,
892
  help=q if len(q) > 35 else None):
893
  st.session_state.messages.append({"role": "user", "content": q})
894
  st.rerun()
 
896
  # Show hint about dynamic questions
897
  if stats.get('total_chunks', 0) > 0:
898
  st.caption("📌 Questions are generated based on your indexed documents")
899
+ elif rag_mode == "cloud":
900
+ st.caption("☁️ Cloud mode - try asking any question")
901
 
902
  # Architecture info
903
  with st.expander("Multi-Agent RAG Architecture"):
demo/rag_config.py CHANGED
@@ -3,11 +3,16 @@ Unified RAG Configuration for SPARKNET Demo
3
 
4
  This module provides a single source of truth for RAG system configuration,
5
  ensuring all demo pages use the same vector store, embeddings, and models.
 
 
 
 
6
  """
7
 
8
  import streamlit as st
9
  from pathlib import Path
10
  import sys
 
11
 
12
  PROJECT_ROOT = Path(__file__).parent.parent
13
  sys.path.insert(0, str(PROJECT_ROOT))
@@ -22,11 +27,21 @@ EMBEDDING_MODELS = ["nomic-embed-text", "mxbai-embed-large:latest", "mxbai-embed
22
  LLM_MODELS = ["llama3.2:latest", "llama3.1:8b", "mistral:latest", "qwen2.5:14b", "qwen2.5:32b"]
23
 
24
 
 
 
 
 
 
 
 
 
 
 
25
  def check_ollama():
26
  """Check Ollama availability and get available models."""
27
  try:
28
  import httpx
29
- with httpx.Client(timeout=5.0) as client:
30
  resp = client.get(f"{OLLAMA_BASE_URL}/api/tags")
31
  if resp.status_code == 200:
32
  models = [m["name"] for m in resp.json().get("models", [])]
@@ -41,114 +56,167 @@ def select_model(available_models: list, preferred_models: list) -> str:
41
  for model in preferred_models:
42
  if model in available_models:
43
  return model
44
- # Return first preference as fallback
45
  return preferred_models[0] if preferred_models else "llama3.2:latest"
46
 
47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  @st.cache_resource
49
  def get_unified_rag_system():
50
  """
51
  Initialize and return the unified RAG system.
52
 
53
  This is cached at the Streamlit level so all pages share the same instance.
 
54
  """
 
55
  try:
56
- # Check for required dependencies first
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  try:
58
- import pydantic
59
- except ImportError:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  return {
61
- "status": "error",
62
- "error": "Required dependency 'pydantic' is not installed. Please check requirements.txt.",
63
- "rag": None,
64
- "store": None,
65
- "embedder": None,
 
 
 
 
66
  }
67
-
68
- from src.rag.agentic import AgenticRAG, RAGConfig
69
- from src.rag.store import get_vector_store, VectorStoreConfig, reset_vector_store
70
- from src.rag.embeddings import get_embedding_adapter, EmbeddingConfig, reset_embedding_adapter
71
-
72
- # Check Ollama
73
- ollama_ok, available_models = check_ollama()
74
- if not ollama_ok:
75
  return {
76
  "status": "error",
77
- "error": "Ollama is not running. Please start Ollama first.",
78
  "rag": None,
79
  "store": None,
80
  "embedder": None,
 
81
  }
82
 
83
- # Select models
84
- embed_model = select_model(available_models, EMBEDDING_MODELS)
85
- llm_model = select_model(available_models, LLM_MODELS)
86
-
87
- # Reset singletons to ensure fresh config
88
- reset_vector_store()
89
- reset_embedding_adapter()
90
-
91
- # Initialize embedding adapter
92
- embed_config = EmbeddingConfig(
93
- ollama_model=embed_model,
94
- ollama_base_url=OLLAMA_BASE_URL,
95
- )
96
- embedder = get_embedding_adapter(config=embed_config)
97
-
98
- # Initialize vector store
99
- store_config = VectorStoreConfig(
100
- persist_directory=VECTOR_STORE_PATH,
101
- collection_name=COLLECTION_NAME,
102
- similarity_threshold=0.0, # No threshold - let reranker handle filtering
103
- )
104
- store = get_vector_store(config=store_config)
105
-
106
- # Initialize RAG config
107
- rag_config = RAGConfig(
108
- model=llm_model,
109
- base_url=OLLAMA_BASE_URL,
110
- max_revision_attempts=1,
111
- enable_query_planning=True,
112
- enable_reranking=True,
113
- enable_validation=True,
114
- retrieval_top_k=10,
115
- final_top_k=5,
116
- min_confidence=0.3,
117
- verbose=False,
118
- )
119
-
120
- # Initialize RAG system
121
- rag = AgenticRAG(
122
- config=rag_config,
123
- vector_store=store,
124
- embedding_adapter=embedder,
125
- )
126
-
127
  return {
128
- "status": "ready",
129
  "error": None,
130
- "rag": rag,
131
- "store": store,
132
- "embedder": embedder,
133
- "embed_model": embed_model,
134
- "llm_model": llm_model,
135
- "available_models": available_models,
136
  }
137
 
138
- except Exception as e:
139
- import traceback
140
  return {
141
- "status": "error",
142
- "error": f"{str(e)}\n{traceback.format_exc()}",
143
  "rag": None,
144
  "store": None,
145
  "embedder": None,
 
146
  }
147
 
148
 
149
  def get_store_stats():
150
  """Get current vector store statistics."""
151
  system = get_unified_rag_system()
 
 
 
 
 
 
 
 
152
  if system["status"] != "ready":
153
  return {"total_chunks": 0, "status": "error"}
154
 
@@ -166,8 +234,12 @@ def get_store_stats():
166
  def index_document(text: str, document_id: str, metadata: dict = None) -> dict:
167
  """Index a document into the unified RAG system."""
168
  system = get_unified_rag_system()
 
 
 
 
169
  if system["status"] != "ready":
170
- return {"success": False, "error": system["error"], "num_chunks": 0}
171
 
172
  try:
173
  num_chunks = system["rag"].index_text(
@@ -183,8 +255,17 @@ def index_document(text: str, document_id: str, metadata: dict = None) -> dict:
183
  def query_rag(question: str, filters: dict = None):
184
  """Query the unified RAG system."""
185
  system = get_unified_rag_system()
 
 
 
 
 
 
 
 
 
186
  if system["status"] != "ready":
187
- return None, system["error"]
188
 
189
  try:
190
  response = system["rag"].query(question, filters=filters)
@@ -195,7 +276,6 @@ def query_rag(question: str, filters: dict = None):
195
 
196
  def clear_index():
197
  """Clear the vector store index."""
198
- # Force reinitialization by clearing cache
199
  get_unified_rag_system.clear()
200
  return True
201
 
@@ -207,16 +287,13 @@ def get_indexed_documents() -> list:
207
  return []
208
 
209
  try:
210
- # Query ChromaDB for unique document IDs
211
  store = system["store"]
212
  collection = store._collection
213
 
214
- # Get all metadata to extract unique document_ids
215
  results = collection.get(include=["metadatas"])
216
  if not results or not results.get("metadatas"):
217
  return []
218
 
219
- doc_ids = set()
220
  doc_info = {}
221
  for meta in results["metadatas"]:
222
  doc_id = meta.get("document_id", "unknown")
@@ -243,7 +320,6 @@ def get_chunks_for_document(document_id: str) -> list:
243
  store = system["store"]
244
  collection = store._collection
245
 
246
- # Query for chunks with this document_id
247
  results = collection.get(
248
  where={"document_id": document_id},
249
  include=["documents", "metadatas"]
@@ -275,15 +351,12 @@ def search_similar_chunks(query: str, top_k: int = 5, doc_filter: str = None):
275
  embedder = system["embedder"]
276
  store = system["store"]
277
 
278
- # Generate query embedding
279
  query_embedding = embedder.embed_text(query)
280
 
281
- # Build filter
282
  filters = None
283
  if doc_filter:
284
  filters = {"document_id": doc_filter}
285
 
286
- # Search
287
  results = store.search(
288
  query_embedding=query_embedding,
289
  top_k=top_k,
@@ -312,7 +385,6 @@ def compute_document_similarity(doc_id_1: str, doc_id_2: str) -> dict:
312
  return {"error": "RAG system not ready", "similarity": 0.0}
313
 
314
  try:
315
- # Get chunks for both documents
316
  chunks_1 = get_chunks_for_document(doc_id_1)
317
  chunks_2 = get_chunks_for_document(doc_id_2)
318
 
@@ -321,15 +393,13 @@ def compute_document_similarity(doc_id_1: str, doc_id_2: str) -> dict:
321
 
322
  embedder = system["embedder"]
323
 
324
- # Compute average embeddings for each document
325
  def avg_embedding(chunks):
326
  embeddings = []
327
- for chunk in chunks[:10]: # Limit to first 10 chunks
328
  emb = embedder.embed_text(chunk["text"])
329
  embeddings.append(emb)
330
  if not embeddings:
331
  return None
332
- # Average
333
  import numpy as np
334
  return np.mean(embeddings, axis=0).tolist()
335
 
@@ -339,7 +409,6 @@ def compute_document_similarity(doc_id_1: str, doc_id_2: str) -> dict:
339
  if emb1 is None or emb2 is None:
340
  return {"error": "Could not compute embeddings", "similarity": 0.0}
341
 
342
- # Compute cosine similarity
343
  import numpy as np
344
  emb1 = np.array(emb1)
345
  emb2 = np.array(emb2)
@@ -358,11 +427,12 @@ def compute_document_similarity(doc_id_1: str, doc_id_2: str) -> dict:
358
  def auto_index_processed_document(doc_id: str, text: str, chunks: list, metadata: dict = None):
359
  """
360
  Auto-index a processed document with pre-computed chunks.
361
-
362
- This is called after document processing completes to immediately
363
- make the document available in RAG.
364
  """
365
  system = get_unified_rag_system()
 
 
 
 
366
  if system["status"] != "ready":
367
  return {"success": False, "error": "RAG system not ready", "num_chunks": 0}
368
 
@@ -370,7 +440,6 @@ def auto_index_processed_document(doc_id: str, text: str, chunks: list, metadata
370
  store = system["store"]
371
  embedder = system["embedder"]
372
 
373
- # Prepare chunks for indexing
374
  chunk_dicts = []
375
  embeddings = []
376
 
@@ -392,14 +461,12 @@ def auto_index_processed_document(doc_id: str, text: str, chunks: list, metadata
392
  }
393
  chunk_dicts.append(chunk_dict)
394
 
395
- # Generate embedding
396
  embedding = embedder.embed_text(chunk_text)
397
  embeddings.append(embedding)
398
 
399
  if not chunk_dicts:
400
  return {"success": False, "error": "No valid chunks to index", "num_chunks": 0}
401
 
402
- # Add to store
403
  store.add_chunks(chunk_dicts, embeddings)
404
 
405
  return {"success": True, "num_chunks": len(chunk_dicts), "error": None}
 
3
 
4
  This module provides a single source of truth for RAG system configuration,
5
  ensuring all demo pages use the same vector store, embeddings, and models.
6
+
7
+ Supports both:
8
+ 1. Local Ollama (for on-premise deployments)
9
+ 2. Cloud LLM providers (for Streamlit Cloud)
10
  """
11
 
12
  import streamlit as st
13
  from pathlib import Path
14
  import sys
15
+ import os
16
 
17
  PROJECT_ROOT = Path(__file__).parent.parent
18
  sys.path.insert(0, str(PROJECT_ROOT))
 
27
  LLM_MODELS = ["llama3.2:latest", "llama3.1:8b", "mistral:latest", "qwen2.5:14b", "qwen2.5:32b"]
28
 
29
 
30
+ def get_secret(key: str, default: str = None):
31
+ """Get secret from Streamlit secrets or environment."""
32
+ try:
33
+ if hasattr(st, 'secrets') and key in st.secrets:
34
+ return st.secrets[key]
35
+ except:
36
+ pass
37
+ return os.environ.get(key, default)
38
+
39
+
40
  def check_ollama():
41
  """Check Ollama availability and get available models."""
42
  try:
43
  import httpx
44
+ with httpx.Client(timeout=3.0) as client:
45
  resp = client.get(f"{OLLAMA_BASE_URL}/api/tags")
46
  if resp.status_code == 200:
47
  models = [m["name"] for m in resp.json().get("models", [])]
 
56
  for model in preferred_models:
57
  if model in available_models:
58
  return model
 
59
  return preferred_models[0] if preferred_models else "llama3.2:latest"
60
 
61
 
62
+ def check_cloud_providers():
63
+ """Check which cloud LLM providers are available."""
64
+ providers = {}
65
+
66
+ if get_secret("GROQ_API_KEY"):
67
+ providers["groq"] = True
68
+ if get_secret("GOOGLE_API_KEY"):
69
+ providers["google"] = True
70
+ if get_secret("OPENROUTER_API_KEY"):
71
+ providers["openrouter"] = True
72
+ if get_secret("HF_TOKEN"):
73
+ providers["huggingface"] = True
74
+ if get_secret("GITHUB_TOKEN"):
75
+ providers["github"] = True
76
+ if get_secret("MISTRAL_API_KEY"):
77
+ providers["mistral"] = True
78
+
79
+ return providers
80
+
81
+
82
  @st.cache_resource
83
  def get_unified_rag_system():
84
  """
85
  Initialize and return the unified RAG system.
86
 
87
  This is cached at the Streamlit level so all pages share the same instance.
88
+ Supports both Ollama (local) and cloud providers (Streamlit Cloud).
89
  """
90
+ # Check for required dependencies first
91
  try:
92
+ import pydantic
93
+ except ImportError:
94
+ return {
95
+ "status": "error",
96
+ "error": "Required dependency 'pydantic' is not installed.",
97
+ "rag": None,
98
+ "store": None,
99
+ "embedder": None,
100
+ "mode": "error",
101
+ }
102
+
103
+ # Check Ollama availability
104
+ ollama_ok, available_models = check_ollama()
105
+
106
+ # Check cloud providers
107
+ cloud_providers = check_cloud_providers()
108
+
109
+ if ollama_ok:
110
+ # Use Ollama for full RAG functionality
111
  try:
112
+ from src.rag.agentic import AgenticRAG, RAGConfig
113
+ from src.rag.store import get_vector_store, VectorStoreConfig, reset_vector_store
114
+ from src.rag.embeddings import get_embedding_adapter, EmbeddingConfig, reset_embedding_adapter
115
+
116
+ # Select models
117
+ embed_model = select_model(available_models, EMBEDDING_MODELS)
118
+ llm_model = select_model(available_models, LLM_MODELS)
119
+
120
+ # Reset singletons to ensure fresh config
121
+ reset_vector_store()
122
+ reset_embedding_adapter()
123
+
124
+ # Initialize embedding adapter
125
+ embed_config = EmbeddingConfig(
126
+ ollama_model=embed_model,
127
+ ollama_base_url=OLLAMA_BASE_URL,
128
+ )
129
+ embedder = get_embedding_adapter(config=embed_config)
130
+
131
+ # Initialize vector store
132
+ store_config = VectorStoreConfig(
133
+ persist_directory=VECTOR_STORE_PATH,
134
+ collection_name=COLLECTION_NAME,
135
+ similarity_threshold=0.0,
136
+ )
137
+ store = get_vector_store(config=store_config)
138
+
139
+ # Initialize RAG config
140
+ rag_config = RAGConfig(
141
+ model=llm_model,
142
+ base_url=OLLAMA_BASE_URL,
143
+ max_revision_attempts=1,
144
+ enable_query_planning=True,
145
+ enable_reranking=True,
146
+ enable_validation=True,
147
+ retrieval_top_k=10,
148
+ final_top_k=5,
149
+ min_confidence=0.3,
150
+ verbose=False,
151
+ )
152
+
153
+ # Initialize RAG system
154
+ rag = AgenticRAG(
155
+ config=rag_config,
156
+ vector_store=store,
157
+ embedding_adapter=embedder,
158
+ )
159
+
160
  return {
161
+ "status": "ready",
162
+ "error": None,
163
+ "rag": rag,
164
+ "store": store,
165
+ "embedder": embedder,
166
+ "embed_model": embed_model,
167
+ "llm_model": llm_model,
168
+ "available_models": available_models,
169
+ "mode": "ollama",
170
  }
171
+ except Exception as e:
172
+ import traceback
 
 
 
 
 
 
173
  return {
174
  "status": "error",
175
+ "error": f"Ollama RAG init failed: {str(e)}",
176
  "rag": None,
177
  "store": None,
178
  "embedder": None,
179
+ "mode": "error",
180
  }
181
 
182
+ elif cloud_providers:
183
+ # Cloud mode - use cloud LLM providers
184
+ # RAG with vector store requires local processing
185
+ # but we can still do basic document Q&A with cloud LLMs
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
  return {
187
+ "status": "cloud",
188
  "error": None,
189
+ "rag": None,
190
+ "store": None,
191
+ "embedder": None,
192
+ "mode": "cloud",
193
+ "providers": list(cloud_providers.keys()),
194
+ "message": "Running in cloud mode. Document Q&A available via cloud LLM providers.",
195
  }
196
 
197
+ else:
198
+ # No backend available
199
  return {
200
+ "status": "demo",
201
+ "error": "No LLM backend configured. Add API keys to secrets.toml or start Ollama.",
202
  "rag": None,
203
  "store": None,
204
  "embedder": None,
205
+ "mode": "demo",
206
  }
207
 
208
 
209
  def get_store_stats():
210
  """Get current vector store statistics."""
211
  system = get_unified_rag_system()
212
+
213
+ if system["mode"] == "cloud":
214
+ return {
215
+ "total_chunks": 0,
216
+ "status": "cloud",
217
+ "message": "Cloud mode - indexing requires Ollama",
218
+ }
219
+
220
  if system["status"] != "ready":
221
  return {"total_chunks": 0, "status": "error"}
222
 
 
234
  def index_document(text: str, document_id: str, metadata: dict = None) -> dict:
235
  """Index a document into the unified RAG system."""
236
  system = get_unified_rag_system()
237
+
238
+ if system["mode"] == "cloud":
239
+ return {"success": False, "error": "Indexing requires Ollama", "num_chunks": 0}
240
+
241
  if system["status"] != "ready":
242
+ return {"success": False, "error": system.get("error", "RAG not ready"), "num_chunks": 0}
243
 
244
  try:
245
  num_chunks = system["rag"].index_text(
 
255
  def query_rag(question: str, filters: dict = None):
256
  """Query the unified RAG system."""
257
  system = get_unified_rag_system()
258
+
259
+ if system["mode"] == "cloud":
260
+ # Use cloud LLM for Q&A
261
+ from llm_providers import generate_response
262
+ response, error = generate_response(question)
263
+ if error:
264
+ return None, error
265
+ return {"answer": response, "sources": [], "mode": "cloud"}, None
266
+
267
  if system["status"] != "ready":
268
+ return None, system.get("error", "RAG not ready")
269
 
270
  try:
271
  response = system["rag"].query(question, filters=filters)
 
276
 
277
  def clear_index():
278
  """Clear the vector store index."""
 
279
  get_unified_rag_system.clear()
280
  return True
281
 
 
287
  return []
288
 
289
  try:
 
290
  store = system["store"]
291
  collection = store._collection
292
 
 
293
  results = collection.get(include=["metadatas"])
294
  if not results or not results.get("metadatas"):
295
  return []
296
 
 
297
  doc_info = {}
298
  for meta in results["metadatas"]:
299
  doc_id = meta.get("document_id", "unknown")
 
320
  store = system["store"]
321
  collection = store._collection
322
 
 
323
  results = collection.get(
324
  where={"document_id": document_id},
325
  include=["documents", "metadatas"]
 
351
  embedder = system["embedder"]
352
  store = system["store"]
353
 
 
354
  query_embedding = embedder.embed_text(query)
355
 
 
356
  filters = None
357
  if doc_filter:
358
  filters = {"document_id": doc_filter}
359
 
 
360
  results = store.search(
361
  query_embedding=query_embedding,
362
  top_k=top_k,
 
385
  return {"error": "RAG system not ready", "similarity": 0.0}
386
 
387
  try:
 
388
  chunks_1 = get_chunks_for_document(doc_id_1)
389
  chunks_2 = get_chunks_for_document(doc_id_2)
390
 
 
393
 
394
  embedder = system["embedder"]
395
 
 
396
  def avg_embedding(chunks):
397
  embeddings = []
398
+ for chunk in chunks[:10]:
399
  emb = embedder.embed_text(chunk["text"])
400
  embeddings.append(emb)
401
  if not embeddings:
402
  return None
 
403
  import numpy as np
404
  return np.mean(embeddings, axis=0).tolist()
405
 
 
409
  if emb1 is None or emb2 is None:
410
  return {"error": "Could not compute embeddings", "similarity": 0.0}
411
 
 
412
  import numpy as np
413
  emb1 = np.array(emb1)
414
  emb2 = np.array(emb2)
 
427
  def auto_index_processed_document(doc_id: str, text: str, chunks: list, metadata: dict = None):
428
  """
429
  Auto-index a processed document with pre-computed chunks.
 
 
 
430
  """
431
  system = get_unified_rag_system()
432
+
433
+ if system["mode"] == "cloud":
434
+ return {"success": False, "error": "Indexing requires Ollama", "num_chunks": 0}
435
+
436
  if system["status"] != "ready":
437
  return {"success": False, "error": "RAG system not ready", "num_chunks": 0}
438
 
 
440
  store = system["store"]
441
  embedder = system["embedder"]
442
 
 
443
  chunk_dicts = []
444
  embeddings = []
445
 
 
461
  }
462
  chunk_dicts.append(chunk_dict)
463
 
 
464
  embedding = embedder.embed_text(chunk_text)
465
  embeddings.append(embedding)
466
 
467
  if not chunk_dicts:
468
  return {"success": False, "error": "No valid chunks to index", "num_chunks": 0}
469
 
 
470
  store.add_chunks(chunk_dicts, embeddings)
471
 
472
  return {"success": True, "num_chunks": len(chunk_dicts), "error": None}
demo/requirements.txt CHANGED
@@ -1,19 +1,76 @@
1
- # SPARKNET Demo Requirements
2
- # Run: pip install -r demo/requirements.txt
3
 
4
- # Streamlit
 
 
5
  streamlit>=1.28.0
6
 
7
- # Data handling
8
- pandas>=2.0.0
9
- numpy>=1.24.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
- # HTTP client (for Ollama checks)
 
 
 
12
  httpx>=0.25.0
 
13
 
14
- # Image handling (optional, for advanced features)
15
- Pillow>=10.0.0
 
 
 
16
 
17
- # Charts (optional)
 
 
 
18
  plotly>=5.18.0
19
  altair>=5.2.0
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SPARKNET Demo Requirements for Streamlit Cloud
2
+ # This file is used by Streamlit Cloud for deployment
3
 
4
+ # ==============================================================================
5
+ # Streamlit Web Framework
6
+ # ==============================================================================
7
  streamlit>=1.28.0
8
 
9
+ # ==============================================================================
10
+ # Data Validation & Configuration (REQUIRED)
11
+ # ==============================================================================
12
+ pydantic>=2.0.0
13
+ pydantic-settings>=2.0.0
14
+ pyyaml>=6.0
15
+ python-dotenv>=1.0.0
16
+ typing-extensions>=4.0.0
17
+
18
+ # ==============================================================================
19
+ # LLM Orchestration (LangChain Ecosystem)
20
+ # ==============================================================================
21
+ langchain>=0.1.0
22
+ langchain-community>=0.0.20
23
+ langchain-ollama>=0.0.1
24
+ langgraph>=0.0.20
25
+ ollama>=0.1.0
26
+
27
+ # ==============================================================================
28
+ # Vector Stores & Embeddings
29
+ # ==============================================================================
30
+ chromadb>=0.4.0
31
+ faiss-cpu>=1.7.4
32
+ sentence-transformers>=2.2.0
33
+
34
+ # ==============================================================================
35
+ # PDF & Document Processing
36
+ # ==============================================================================
37
+ pymupdf>=1.23.0
38
+ reportlab>=4.0.0
39
+
40
+ # ==============================================================================
41
+ # Observability & Logging
42
+ # ==============================================================================
43
+ loguru>=0.7.0
44
+ rich>=13.0.0
45
 
46
+ # ==============================================================================
47
+ # Web & HTTP
48
+ # ==============================================================================
49
+ requests>=2.31.0
50
  httpx>=0.25.0
51
+ beautifulsoup4>=4.12.0
52
 
53
+ # ==============================================================================
54
+ # Data Handling
55
+ # ==============================================================================
56
+ pandas>=2.0.0
57
+ numpy>=1.24.0
58
 
59
+ # ==============================================================================
60
+ # Image & Charts
61
+ # ==============================================================================
62
+ Pillow>=10.0.0
63
  plotly>=5.18.0
64
  altair>=5.2.0
65
+
66
+ # ==============================================================================
67
+ # System & Caching
68
+ # ==============================================================================
69
+ psutil>=5.9.0
70
+ cachetools>=5.3.0
71
+ tenacity>=8.2.0
72
+
73
+ # ==============================================================================
74
+ # Workflow
75
+ # ==============================================================================
76
+ networkx>=3.0
demo/state_manager.py CHANGED
@@ -654,14 +654,18 @@ def render_global_status_bar():
654
 
655
  # Import RAG config for additional status
656
  try:
657
- from rag_config import get_unified_rag_system, check_ollama
658
  rag_system = get_unified_rag_system()
659
  ollama_ok, models = check_ollama()
 
660
  rag_status = rag_system["status"]
 
661
  llm_model = rag_system.get("llm_model", "N/A")
662
  except:
663
  ollama_ok = False
 
664
  rag_status = "error"
 
665
  llm_model = "N/A"
666
  models = []
667
 
@@ -671,17 +675,29 @@ def render_global_status_bar():
671
  with cols[0]:
672
  if ollama_ok:
673
  st.success(f"Ollama ({len(models)})")
 
 
674
  else:
675
- st.error("Ollama Offline")
676
 
677
  with cols[1]:
678
  if rag_status == "ready":
679
  st.success("RAG Ready")
 
 
 
 
680
  else:
681
  st.error("RAG Error")
682
 
683
  with cols[2]:
684
- st.info(f"{llm_model.split(':')[0]}")
 
 
 
 
 
 
685
 
686
  with cols[3]:
687
  st.info(f"{summary['total_documents']} Docs")
 
654
 
655
  # Import RAG config for additional status
656
  try:
657
+ from rag_config import get_unified_rag_system, check_ollama, check_cloud_providers
658
  rag_system = get_unified_rag_system()
659
  ollama_ok, models = check_ollama()
660
+ cloud_providers = check_cloud_providers()
661
  rag_status = rag_system["status"]
662
+ rag_mode = rag_system.get("mode", "error")
663
  llm_model = rag_system.get("llm_model", "N/A")
664
  except:
665
  ollama_ok = False
666
+ cloud_providers = {}
667
  rag_status = "error"
668
+ rag_mode = "error"
669
  llm_model = "N/A"
670
  models = []
671
 
 
675
  with cols[0]:
676
  if ollama_ok:
677
  st.success(f"Ollama ({len(models)})")
678
+ elif cloud_providers:
679
+ st.info(f"Cloud ({len(cloud_providers)})")
680
  else:
681
+ st.warning("Demo Mode")
682
 
683
  with cols[1]:
684
  if rag_status == "ready":
685
  st.success("RAG Ready")
686
+ elif rag_mode == "cloud":
687
+ st.info("Cloud LLM")
688
+ elif rag_mode == "demo":
689
+ st.warning("Demo Mode")
690
  else:
691
  st.error("RAG Error")
692
 
693
  with cols[2]:
694
+ if rag_mode == "cloud" and cloud_providers:
695
+ provider_name = list(cloud_providers.keys())[0].title()
696
+ st.info(f"{provider_name}")
697
+ elif llm_model != "N/A":
698
+ st.info(f"{llm_model.split(':')[0]}")
699
+ else:
700
+ st.info("Offline")
701
 
702
  with cols[3]:
703
  st.info(f"{summary['total_documents']} Docs")