File size: 6,140 Bytes
9c4c212
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
"""
Hugging Face Spaces - Enterprise RAG System
Standalone Streamlit application
"""

import streamlit as st
import os
import sys



# Add src to path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))

from dotenv import load_dotenv
load_dotenv()

from src.pipeline.query_pipeline import QueryPipeline
from src.ingestion.ingest import IngestionPipeline
import subprocess

def prepare_data():
    """Ensure data is generated and indexed if missing"""
    INDEX_DIR = "data/index"
    RAW_DIR = "data/raw"
    
    # 1. Create directories
    os.makedirs(INDEX_DIR, exist_ok=True)
    os.makedirs(RAW_DIR, exist_ok=True)
    
    # 2. Check if raw data exists (Only needed if NOT using Pinecone)
    if os.getenv("VECTOR_DB_TYPE", "").lower() != "pinecone":
        if not os.listdir(RAW_DIR):
            st.error("❌ Data folder empty! Please commit your 'data/raw' folder to Git and redeploy.")
            st.stop()

    # 3. Check if indices exist, if not run ingestion (Skip for Pinecone)
    # 3. Check if indices exist (Files should be baked in)
    bm25_path = os.path.join(INDEX_DIR, "bm25.pkl")
    
    # Only download if absolutely missing (Fallback for dev env)
    if not os.path.exists(bm25_path):
        with st.spinner("Downloading Knowledge Base (Dev Mode)..."):
             try:
                from huggingface_hub import hf_hub_download
                os.makedirs(INDEX_DIR, exist_ok=True)
                hf_hub_download(repo_id="yuvis/enterprise-rag-index", filename="index/bm25.pkl", repo_type="dataset", local_dir="data")
                hf_hub_download(repo_id="yuvis/enterprise-rag-index", filename="index/doc_map.pkl", repo_type="dataset", local_dir="data")
             except Exception:
                 pass

st.set_page_config(
    page_title="Enterprise RAG Search",
    page_icon="πŸ”",
    layout="wide"
)

# Initialize pipeline
@st.cache_resource
def load_pipeline():
    """Load the RAG pipeline (cached for performance)"""
    try:
        # Ensure data is ready before initializing pipeline
        prepare_data()
        return QueryPipeline()
    except Exception as e:
        st.error(f"Error loading pipeline: {e}")
        st.exception(e)
        return None

# Main UI
st.title("πŸ” Enterprise RAG Search")
st.markdown("*Production-grade Retrieval-Augmented Generation with Hallucination Prevention*")

# Sidebar configuration
with st.sidebar:
    st.header("βš™οΈ Configuration")
    st.caption("πŸš€ Version: Pinecone V2")
    
    # Check for API key
    groq_key = os.getenv("GROQ_API_KEY")
    if not groq_key:
        st.warning("⚠️ GROQ_API_KEY not set. Please configure in Space settings.")
    else:
        st.success("βœ… API Key configured")
    
    st.divider()
    
    top_k_retrieval = st.slider("Retrieval Top-K", 5, 50, 20)
    top_k_rerank = st.slider("Rerank Top-K", 1, 10, 5)
    
    st.divider()
    st.markdown("### πŸ“Š System Info")
    st.info("""
    - **Hybrid Search**: BM25 + FAISS
    - **Reranking**: Cross-Encoder
    - **Safety**: Confidence Gating
    """)

# Initialize session state
if "messages" not in st.session_state:
    st.session_state.messages = []

# Display chat history
for message in st.session_state.messages:
    with st.chat_message(message["role"]):
        st.markdown(message["content"])

# Chat input
if prompt := st.chat_input("Ask a question about your documents..."):
    # Add user message
    st.session_state.messages.append({"role": "user", "content": prompt})
    with st.chat_message("user"):
        st.markdown(prompt)
    
    # Generate response
    with st.chat_message("assistant"):
        with st.spinner("Searching and generating answer..."):
            pipeline = load_pipeline()
            
            if pipeline is None:
                st.error("Pipeline not loaded. Please check configuration.")
            else:
                try:
                    result = pipeline.run(
                        query=prompt,
                        top_k_retrieval=top_k_retrieval,
                        top_k_rerank=top_k_rerank
                    )
                    
                    # Display answer
                    st.markdown(result["answer"])
                    
                    # Display metadata in expander
                    with st.expander("πŸ“‹ View Details"):
                        col1, col2, col3 = st.columns(3)
                        
                        with col1:
                            st.metric("Retrieval Score", f"{result.get('retrieval_score', 'N/A'):.2f}")
                        
                        with col2:
                            hallucination = result.get('hallucination_score', 'N/A')
                            if hallucination != 'N/A':
                                st.metric("Hallucination Score", f"{hallucination:.2f}")
                        
                        with col3:
                            groundedness = result.get('groundedness', 'N/A')
                            if groundedness != 'N/A':
                                st.metric("Groundedness", f"{groundedness:.2f}")
                        
                        # Show retrieved context
                        if result.get("context"):
                            st.markdown("**Retrieved Context:**")
                            for i, (doc, score) in enumerate(result["context"][:3], 1):
                                st.markdown(f"{i}. [Score: {score:.2f}] {doc[:200]}...")
                    
                    # Add to chat history
                    st.session_state.messages.append({
                        "role": "assistant",
                        "content": result["answer"]
                    })
                    
                except Exception as e:
                    st.error(f"Error generating response: {e}")
                    st.exception(e)

# Footer
st.divider()
st.markdown("""
<div style='text-align: center; color: gray; font-size: 0.8em;'>
    Enterprise RAG System | <a href='https://github.com/YuvrajSinghBhadoria2/Enterprise-RAG-System'>GitHub</a>
</div>
""", unsafe_allow_html=True)