Spaces:

alihaiderscholar
/

Pro-RAG-Level1

Sleeping

App Files Files Community

alihaiderscholar commited on Jan 22

Commit

0d86b5b

verified ·

1 Parent(s): 4d66f65

Upload 4 files

Browse files

Files changed (4) hide show

app.py +141 -0
main.py +75 -0
packages.txt +1 -0
requirements.txt +14 -3

app.py ADDED Viewed

	@@ -0,0 +1,141 @@

+import streamlit as st
+import os
+import time
+from src.retrieval import RetrievalEngine
+# --- PAGE CONFIGURATION ---
+st.set_page_config(
+    page_title="Pro RAG Enterprise",
+    page_icon="🤖",
+    layout="wide",
+    initial_sidebar_state="expanded"
+)
+# --- CUSTOM CSS ---
+st.markdown("""
+<style>
+    .stChatInputContainer {
+        padding-bottom: 20px;
+    }
+    .block-container {
+        padding-top: 30px;
+    }
+    h1 {
+        color: #0F172A;
+    }
+    .stSidebar {
+        background-color: #F8FAFC;
+        border-right: 1px solid #E2E8F0;
+    }
+    /* Status Badge Style */
+    .status-badge {
+        padding: 4px 8px;
+        border-radius: 4px;
+        font-size: 0.8em;
+        font-weight: bold;
+    }
+</style>
+""", unsafe_allow_html=True)
+# --- 1. INITIALIZE ENGINE (Cached) ---
+@st.cache_resource
+def get_engine():
+    return RetrievalEngine()
+# Initialize and Check Connection Type
+try:
+    engine = get_engine()
+    # Check env vars to see where we are connected
+    if os.getenv("QDRANT_URL"):
+        conn_type = "☁️ Qdrant Cloud"
+        status_color = "green"
+    else:
+        conn_type = "🏠 Local Docker"
+        status_color = "orange"
+    db_status = f"{conn_type} Connected"
+except Exception as e:
+    engine = None
+    db_status = f"❌ Error: {e}"
+    status_color = "red"
+# --- 2. SIDEBAR (The Control Panel) ---
+with st.sidebar:
+    st.title("🎛️ Control Panel")
+    # Connection Status
+    st.markdown(f"**System Status:** :{status_color}[{db_status}]")
+    st.divider()
+    # Mode Selection
+    st.subheader("🔍 Search Mode")
+    mode_display = {
+        "Global Search (All Data)": "all",
+        "📄 PDF Documents (Financials)": "pdf",
+        "📊 Structured Data (Excel/CSV)": "csv",
+        "🖼️ Visual Intelligence (Graphs)": "visual"
+    }
+    selected_mode_label = st.selectbox(
+        "Select Knowledge Source:",
+        list(mode_display.keys()),
+        index=0
+    )
+    # Convert label back to backend keyword
+    filter_mode = mode_display[selected_mode_label]
+    st.info(
+        f"""
+        **Current Focus:** {selected_mode_label}
+        *Engine filters retrieval to strictly match this data type.*
+        """
+    )
+    st.divider()
+    if st.button("🗑️ Clear Chat History"):
+        st.session_state.messages = []
+        st.rerun()
+# --- 3. MAIN CHAT INTERFACE ---
+st.title("🤖 Enterprise Knowledge Assistant")
+st.caption("Level 1 Pro RAG System | Powered by Qdrant & GPT-4o")
+# Initialize Chat History
+if "messages" not in st.session_state:
+    st.session_state.messages = []
+# Display Previous Messages
+for message in st.session_state.messages:
+    with st.chat_message(message["role"]):
+        st.markdown(message["content"])
+# --- 4. HANDLE USER INPUT ---
+if prompt := st.chat_input("Ask a question about your data..."):
+    # A. Display User Message
+    st.session_state.messages.append({"role": "user", "content": prompt})
+    with st.chat_message("user"):
+        st.markdown(prompt)
+    # B. Generate AI Response
+    with st.chat_message("assistant"):
+        message_placeholder = st.empty()
+        with st.spinner(f"Searching {selected_mode_label}..."):
+            try:
+                # CALL THE BACKEND
+                response_text = engine.query(prompt, filter_type=filter_mode)
+                # Display response
+                message_placeholder.markdown(response_text)
+            except Exception as e:
+                error_msg = f"❌ System Error: {str(e)}"
+                message_placeholder.error(error_msg)
+                response_text = error_msg
+    # C. Save AI Message
+    st.session_state.messages.append({"role": "assistant", "content": response_text})

main.py ADDED Viewed

	@@ -0,0 +1,75 @@

+import sys
+from src.retrieval import RetrievalEngine
+from src.database import VectorDB
+from src.ingestion import IngestionManager
+from src.chunking import ChunkingManager
+from src.indexing import IndexerManager
+from src.retrieval import RetrievalEngine
+def run_ingestion_pipeline():
+    """Runs the full ETL pipeline (Ingest -> Chunk -> Index)"""
+    print("🚀 Starting Pro RAG Ingestion Pipeline...")
+    # 1. DB Setup
+    db = VectorDB(collection_name="pro_rag_container")
+    db.create_collection()
+    # 2. Ingest
+    ingestion = IngestionManager()
+    raw_docs = ingestion.process_all_data()
+    if not raw_docs: return
+    # 3. Chunk
+    chunker = ChunkingManager()
+    processed_chunks = chunker.chunk_documents(raw_docs)
+    # 4. Index
+    indexer = IndexerManager(collection_name="pro_rag_container")
+    indexer.index_documents(processed_chunks)
+    print("\n🎉 Pipeline Complete.")
+def start_chat_mode():
+    print("\n💬 Entering Chat Mode... (Type 'exit' to quit)")
+    print("Commands: Type 'mode:pdf', 'mode:csv', 'mode:visual' or 'mode:all' to switch filters.")
+    engine = RetrievalEngine()
+    current_mode = "all"
+    while True:
+        try:
+            query = input(f"\nUser ({current_mode.upper()}): ")
+            if query.lower() in ["exit", "quit", "q"]:
+                break
+            # Mode Switcher Logic
+            if query.startswith("mode:"):
+                new_mode = query.split(":")[1].strip()
+                if new_mode in ["pdf", "csv", "visual", "all"]:
+                    current_mode = new_mode
+                    print(f"🔄 Switched filter to: {current_mode.upper()}")
+                else:
+                    print("❌ Invalid mode. Use: pdf, csv, visual, all")
+                continue
+            if not query.strip():
+                continue
+            # Pass the filter to the engine
+            response = engine.query(query, filter_type=current_mode)
+            print(f"\n🤖 AI Assistant:\n{response}")
+            print("-" * 50)
+        except Exception as e:
+            print(f"❌ Error: {e}")
+if __name__ == "__main__":
+    # Simple CLI argument to switch modes
+    # Usage:
+    #   python main.py setup  -> Runs Ingestion
+    #   python main.py        -> Runs Chat
+    if len(sys.argv) > 1 and sys.argv[1] == "setup":
+        run_ingestion_pipeline()
+    else:
+        start_chat_mode()

packages.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ poppler-utils

requirements.txt CHANGED Viewed

@@ -1,3 +1,14 @@
-altair
-pandas
-streamlit

+langchain
+langchain-community
+langchain-openai
+langchain-qdrant
+qdrant-client
+pandas
+openpyxl
+pypdf
+pdf2image
+pillow
+tiktoken
+python-dotenv
+unstructured
+python-magic-bin