alihaiderscholar commited on
Commit
0d86b5b
·
verified ·
1 Parent(s): 4d66f65

Upload 4 files

Browse files
Files changed (4) hide show
  1. app.py +141 -0
  2. main.py +75 -0
  3. packages.txt +1 -0
  4. requirements.txt +14 -3
app.py ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ import time
4
+ from src.retrieval import RetrievalEngine
5
+
6
+ # --- PAGE CONFIGURATION ---
7
+ st.set_page_config(
8
+ page_title="Pro RAG Enterprise",
9
+ page_icon="🤖",
10
+ layout="wide",
11
+ initial_sidebar_state="expanded"
12
+ )
13
+
14
+ # --- CUSTOM CSS ---
15
+ st.markdown("""
16
+ <style>
17
+ .stChatInputContainer {
18
+ padding-bottom: 20px;
19
+ }
20
+ .block-container {
21
+ padding-top: 30px;
22
+ }
23
+ h1 {
24
+ color: #0F172A;
25
+ }
26
+ .stSidebar {
27
+ background-color: #F8FAFC;
28
+ border-right: 1px solid #E2E8F0;
29
+ }
30
+ /* Status Badge Style */
31
+ .status-badge {
32
+ padding: 4px 8px;
33
+ border-radius: 4px;
34
+ font-size: 0.8em;
35
+ font-weight: bold;
36
+ }
37
+ </style>
38
+ """, unsafe_allow_html=True)
39
+
40
+ # --- 1. INITIALIZE ENGINE (Cached) ---
41
+ @st.cache_resource
42
+ def get_engine():
43
+ return RetrievalEngine()
44
+
45
+ # Initialize and Check Connection Type
46
+ try:
47
+ engine = get_engine()
48
+
49
+ # Check env vars to see where we are connected
50
+ if os.getenv("QDRANT_URL"):
51
+ conn_type = "☁️ Qdrant Cloud"
52
+ status_color = "green"
53
+ else:
54
+ conn_type = "🏠 Local Docker"
55
+ status_color = "orange"
56
+
57
+ db_status = f"{conn_type} Connected"
58
+
59
+ except Exception as e:
60
+ engine = None
61
+ db_status = f"❌ Error: {e}"
62
+ status_color = "red"
63
+
64
+ # --- 2. SIDEBAR (The Control Panel) ---
65
+ with st.sidebar:
66
+ st.title("🎛️ Control Panel")
67
+
68
+ # Connection Status
69
+ st.markdown(f"**System Status:** :{status_color}[{db_status}]")
70
+ st.divider()
71
+
72
+ # Mode Selection
73
+ st.subheader("🔍 Search Mode")
74
+ mode_display = {
75
+ "Global Search (All Data)": "all",
76
+ "📄 PDF Documents (Financials)": "pdf",
77
+ "📊 Structured Data (Excel/CSV)": "csv",
78
+ "🖼️ Visual Intelligence (Graphs)": "visual"
79
+ }
80
+
81
+ selected_mode_label = st.selectbox(
82
+ "Select Knowledge Source:",
83
+ list(mode_display.keys()),
84
+ index=0
85
+ )
86
+ # Convert label back to backend keyword
87
+ filter_mode = mode_display[selected_mode_label]
88
+
89
+ st.info(
90
+ f"""
91
+ **Current Focus:** {selected_mode_label}
92
+
93
+ *Engine filters retrieval to strictly match this data type.*
94
+ """
95
+ )
96
+
97
+ st.divider()
98
+ if st.button("🗑️ Clear Chat History"):
99
+ st.session_state.messages = []
100
+ st.rerun()
101
+
102
+ # --- 3. MAIN CHAT INTERFACE ---
103
+
104
+ st.title("🤖 Enterprise Knowledge Assistant")
105
+ st.caption("Level 1 Pro RAG System | Powered by Qdrant & GPT-4o")
106
+
107
+ # Initialize Chat History
108
+ if "messages" not in st.session_state:
109
+ st.session_state.messages = []
110
+
111
+ # Display Previous Messages
112
+ for message in st.session_state.messages:
113
+ with st.chat_message(message["role"]):
114
+ st.markdown(message["content"])
115
+
116
+ # --- 4. HANDLE USER INPUT ---
117
+ if prompt := st.chat_input("Ask a question about your data..."):
118
+ # A. Display User Message
119
+ st.session_state.messages.append({"role": "user", "content": prompt})
120
+ with st.chat_message("user"):
121
+ st.markdown(prompt)
122
+
123
+ # B. Generate AI Response
124
+ with st.chat_message("assistant"):
125
+ message_placeholder = st.empty()
126
+
127
+ with st.spinner(f"Searching {selected_mode_label}..."):
128
+ try:
129
+ # CALL THE BACKEND
130
+ response_text = engine.query(prompt, filter_type=filter_mode)
131
+
132
+ # Display response
133
+ message_placeholder.markdown(response_text)
134
+
135
+ except Exception as e:
136
+ error_msg = f"❌ System Error: {str(e)}"
137
+ message_placeholder.error(error_msg)
138
+ response_text = error_msg
139
+
140
+ # C. Save AI Message
141
+ st.session_state.messages.append({"role": "assistant", "content": response_text})
main.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ from src.retrieval import RetrievalEngine
3
+ from src.database import VectorDB
4
+ from src.ingestion import IngestionManager
5
+ from src.chunking import ChunkingManager
6
+ from src.indexing import IndexerManager
7
+ from src.retrieval import RetrievalEngine
8
+
9
+ def run_ingestion_pipeline():
10
+ """Runs the full ETL pipeline (Ingest -> Chunk -> Index)"""
11
+ print("🚀 Starting Pro RAG Ingestion Pipeline...")
12
+
13
+ # 1. DB Setup
14
+ db = VectorDB(collection_name="pro_rag_container")
15
+ db.create_collection()
16
+
17
+ # 2. Ingest
18
+ ingestion = IngestionManager()
19
+ raw_docs = ingestion.process_all_data()
20
+ if not raw_docs: return
21
+
22
+ # 3. Chunk
23
+ chunker = ChunkingManager()
24
+ processed_chunks = chunker.chunk_documents(raw_docs)
25
+
26
+ # 4. Index
27
+ indexer = IndexerManager(collection_name="pro_rag_container")
28
+ indexer.index_documents(processed_chunks)
29
+ print("\n🎉 Pipeline Complete.")
30
+
31
+ def start_chat_mode():
32
+ print("\n💬 Entering Chat Mode... (Type 'exit' to quit)")
33
+ print("Commands: Type 'mode:pdf', 'mode:csv', 'mode:visual' or 'mode:all' to switch filters.")
34
+
35
+ engine = RetrievalEngine()
36
+ current_mode = "all"
37
+
38
+ while True:
39
+ try:
40
+ query = input(f"\nUser ({current_mode.upper()}): ")
41
+ if query.lower() in ["exit", "quit", "q"]:
42
+ break
43
+
44
+ # Mode Switcher Logic
45
+ if query.startswith("mode:"):
46
+ new_mode = query.split(":")[1].strip()
47
+ if new_mode in ["pdf", "csv", "visual", "all"]:
48
+ current_mode = new_mode
49
+ print(f"🔄 Switched filter to: {current_mode.upper()}")
50
+ else:
51
+ print("❌ Invalid mode. Use: pdf, csv, visual, all")
52
+ continue
53
+
54
+ if not query.strip():
55
+ continue
56
+
57
+ # Pass the filter to the engine
58
+ response = engine.query(query, filter_type=current_mode)
59
+
60
+ print(f"\n🤖 AI Assistant:\n{response}")
61
+ print("-" * 50)
62
+
63
+ except Exception as e:
64
+ print(f"❌ Error: {e}")
65
+
66
+ if __name__ == "__main__":
67
+ # Simple CLI argument to switch modes
68
+ # Usage:
69
+ # python main.py setup -> Runs Ingestion
70
+ # python main.py -> Runs Chat
71
+
72
+ if len(sys.argv) > 1 and sys.argv[1] == "setup":
73
+ run_ingestion_pipeline()
74
+ else:
75
+ start_chat_mode()
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ poppler-utils
requirements.txt CHANGED
@@ -1,3 +1,14 @@
1
- altair
2
- pandas
3
- streamlit
 
 
 
 
 
 
 
 
 
 
 
 
1
+ langchain
2
+ langchain-community
3
+ langchain-openai
4
+ langchain-qdrant
5
+ qdrant-client
6
+ pandas
7
+ openpyxl
8
+ pypdf
9
+ pdf2image
10
+ pillow
11
+ tiktoken
12
+ python-dotenv
13
+ unstructured
14
+ python-magic-bin