Spaces:
Sleeping
Sleeping
File size: 8,202 Bytes
26fe9a7 7f4636f 26fe9a7 267abd7 26fe9a7 81b1c13 26fe9a7 102f416 2859808 26fe9a7 102f416 26fe9a7 432f405 81b1c13 432f405 26fe9a7 2859808 26fe9a7 81b1c13 26fe9a7 911a4b5 d3a38ee 911a4b5 26fe9a7 d3a38ee 2859808 d3a38ee 2859808 d3a38ee 2859808 d3a38ee 2859808 d3a38ee 2859808 26fe9a7 2859808 26fe9a7 d3a38ee 26fe9a7 d3a38ee 26fe9a7 d3a38ee 26fe9a7 d3a38ee 26fe9a7 911a4b5 26fe9a7 d3a38ee 26fe9a7 473fd36 26fe9a7 9362424 c3e0a07 bb4f4c4 325cfd9 2fd74c2 8558835 325cfd9 c3e0a07 325cfd9 b8d80bf 325cfd9 b8d80bf 325cfd9 8367f0a 325cfd9 c3e0a07 d3a38ee 6c10370 a9e40c2 c3e0a07 325cfd9 8558835 d3a38ee 325cfd9 c3e0a07 325cfd9 8367f0a 8558835 c3e0a07 8367f0a 8558835 8367f0a 8558835 26fe9a7 562dec3 f2704bd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 |
import sys
import os
from dotenv import load_dotenv
load_dotenv()
# Force rebuild trigger
# Add root to sys.path
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../..")))
import gradio as gr
import shutil
import spaces
from services.rag.retrieve import get_retriever
from services.rag.rerank import get_reranker
from services.rag.generate import get_generator, run_local_generation
from services.rag.ingest import ingest
from services.rag.index import build_index
from services.observability.langfuse_client import observe
# Constants
DATA_DIR = "data"
PROCESSED_DIR = os.path.join(DATA_DIR, "processed")
INDEX_DIR = os.path.join(DATA_DIR, "index")
SAMPLES_DIR = "samples"
# Global Singletons
retriever = None
reranker = None
generator = None
def init_services():
global retriever, reranker, generator
try:
if os.path.exists(INDEX_DIR):
retriever = get_retriever(INDEX_DIR)
reranker = get_reranker()
generator = get_generator()
except Exception as e:
print(f"Service init warning: {e}")
# GPU-wrapped generation function
@spaces.GPU
def generate_response_gpu(message, context_chunks, backend):
# Call the standalone function directly
# Note: We must pass data, not the service instance
return run_local_generation(message, context_chunks)
@observe(name="chat_interaction")
def chat_fn(message, history, backend):
global retriever, reranker, generator
if retriever is None:
# Try to reload if index was just built
init_services()
if retriever is None:
return "System is not ready. Please go to '1. Knowledge Base' tab and ingest documents."
# 0. Contextualize Query (Simple)
full_query = message
import time
start_time = time.time()
# 1. Retrieve
retrieved = retriever.retrieve(full_query, top_k=10)
if not retrieved:
return "No relevant documents found in index."
# 2. Rerank
reranked = reranker.rerank(full_query, retrieved, top_k=5)
# 3. Generate
if backend == "local":
answer = generate_response_gpu(full_query, reranked, backend)
else:
answer = generator.generate(full_query, reranked, backend=backend)
# 4. Format Output with Evidence
elapsed = time.time() - start_time
# Build Sources Text
sources_text = "\n\n### Evidence\n"
for i, chunk in enumerate(reranked):
meta = chunk.get('metadata', {})
score = chunk.get('rerank_score', chunk.get('score', 0))
sources_text += f"**[{i+1}] {meta.get('doc_id')}** (Score: {score:.2f})\n"
snippet = chunk['content'][:150].replace('\n', ' ')
sources_text += f"> ...{snippet}...\n\n"
final_response = f"{answer}\n\n{sources_text}\n*(Backend: {backend} | Time: {elapsed:.2f}s)*"
return final_response
def clear_knowledge_base():
# Helper to wipe data
for d in [PROCESSED_DIR, INDEX_DIR]:
if os.path.exists(d):
shutil.rmtree(d)
os.makedirs(d)
# Reset helper
import services.rag.retrieve
services.rag.retrieve._shared_retriever = None
init_services()
return "Knowledge Base Cleared. System is empty."
def admin_ingest(files, use_sample):
# 1. Clean Temp Input ONLY (Keep Processed/Index for additive)
temp_in = "temp_ingest"
if os.path.exists(temp_in):
shutil.rmtree(temp_in)
os.makedirs(temp_in)
# Ensure processed/index dirs exist
os.makedirs(PROCESSED_DIR, exist_ok=True)
os.makedirs(INDEX_DIR, exist_ok=True)
status = "Starting processing...\n"
# Handle Source Selection
files_found = False
if use_sample:
# Copy from samples dir
sample_file = os.path.join(SAMPLES_DIR, "sports_legends.txt")
if os.path.exists(sample_file):
shutil.copy(sample_file, temp_in)
status += f"Loaded: Sports Legends Dataset\n"
files_found = True
else:
return "Error: Sample data not found on server."
if files:
# Copy uploaded files
for file in files:
shutil.copy(file.name, temp_in)
status += f"Loaded: {len(files)} new files.\n"
files_found = True
if not files_found:
return "No new files selected. Select files or sample data."
yield status
# Run Ingest
try:
# Ingest new files to PROCESSED_DIR (Additive)
ingest(temp_in, PROCESSED_DIR)
status += "Processing new files complete.\nRebuilding Index...\n"
yield status
# Build Index (scans ALL files in PROCESSED_DIR)
build_index(PROCESSED_DIR, INDEX_DIR)
status += "Index rebuilt with all documents.\nReloading services...\n"
yield status
# FORCE RELOAD: Clear singletons
import services.rag.retrieve
services.rag.retrieve._shared_retriever = None
init_services()
status += "Services reloaded. Knowledge Base Updated successfully!"
except Exception as e:
print(f"Ingestion Failed: {e}") # Print to server logs
import traceback
traceback.print_exc()
status += f"Error: {e}"
return status
# Initialize on module load
init_services()
with gr.Blocks(title="RAG Knowledge Assistant", theme=gr.themes.Soft()) as demo:
gr.Markdown("# RAG Knowledge Assistant")
with gr.Row():
# Left Column: Sidebar (Controls & Guide)
with gr.Column(scale=1, variant="panel"):
with gr.Group():
file_upload = gr.File(
label="Document Upload",
file_count="multiple",
file_types=[".pdf", ".txt", ".html"],
height=70
)
gr.HTML("<div style='text-align: center; color: #666; font-size: 11px; margin: 2px 0;'>— OR —</div>")
use_sample_chk = gr.Checkbox(
label="Use Sports Legends Sample Dataset",
container=False
)
ingest_btn = gr.Button("Process Documents", variant="primary", size="sm")
clear_btn = gr.Button("Clear Knowledge Base", variant="stop", size="sm")
# Status Log - Visible by default
with gr.Accordion("System Logs", open=True):
status_box = gr.Textbox(
show_label=False,
value="System Ready.",
interactive=False,
lines=4,
max_lines=10,
text_align="left"
)
ingest_btn.click(
admin_ingest,
inputs=[file_upload, use_sample_chk],
outputs=[status_box]
)
clear_btn.click(
clear_knowledge_base,
outputs=[status_box]
)
with gr.Group():
backend_radio = gr.Radio(
choices=["openai", "gemini", "local"],
value="openai",
label="LLM Backend",
container=False
)
gr.HTML("<div style='font-size: 9px; color: #888; margin-top: 2px;'>*Local = Mistral-7B (ZeroGPU)</div>")
# Right Column: Main App (Chat)
with gr.Column(scale=4):
chatbot = gr.ChatInterface(
fn=chat_fn,
additional_inputs=[backend_radio],
title="Chat Interface",
description="Ask questions about your uploaded documents or the sample dataset.",
examples=[
["Who is the greatest quarterback?", "openai"],
["Summarize the uploaded documents", "local"],
["What makes Lionel Messi a legend?", "gemini"]
]
)
if __name__ == "__main__":
# specific server_name needed for Docker/Spaces
demo.queue().launch(server_name="0.0.0.0")
|