Spaces:
Running
Running
Upload 14 files
Browse files- .env +117 -0
- Dockerfile +51 -0
- app.py +339 -0
- assets/users.csv +12 -0
- chunker.py +135 -0
- config.py +83 -0
- note.txt +7 -0
- postman.json +153 -0
- rag_components.py +357 -0
- rag_system.py +70 -0
- requirements.txt +16 -0
- sources/vehicle.csv +262 -0
- templates/chat-bot.html +480 -0
- utils.py +150 -0
.env
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ============================================================
|
| 2 |
+
# RAG System — Environment Configuration
|
| 3 |
+
# ============================================================
|
| 4 |
+
|
| 5 |
+
# --- API Credentials (for /webhook/search — used by n8n) ---
|
| 6 |
+
API_USERNAME=admin
|
| 7 |
+
API_PASSWORD=12345
|
| 8 |
+
|
| 9 |
+
# --- Admin Dashboard Credentials (fallback if users.csv missing) ---
|
| 10 |
+
FLASK_ADMIN_USERNAME=admin
|
| 11 |
+
FLASK_ADMIN_PASSWORD=1234
|
| 12 |
+
|
| 13 |
+
# ============================================================
|
| 14 |
+
# RAG — Storage & Sources
|
| 15 |
+
# ============================================================
|
| 16 |
+
|
| 17 |
+
# Absolute or relative path to the folder containing source documents
|
| 18 |
+
SOURCES_DIR=sources
|
| 19 |
+
|
| 20 |
+
# Absolute or relative path where the FAISS index will be stored
|
| 21 |
+
RAG_STORAGE_DIR=faiss_storage
|
| 22 |
+
|
| 23 |
+
# ============================================================
|
| 24 |
+
# RAG — Embedding Model
|
| 25 |
+
# ============================================================
|
| 26 |
+
|
| 27 |
+
RAG_EMBEDDING_MODEL=BAAI/bge-small-en
|
| 28 |
+
|
| 29 |
+
# Set to True to use CUDA GPU for embeddings (requires CUDA)
|
| 30 |
+
RAG_EMBEDDING_GPU=False
|
| 31 |
+
|
| 32 |
+
# Load existing FAISS index from disk on startup (True/False)
|
| 33 |
+
RAG_LOAD_INDEX=True
|
| 34 |
+
|
| 35 |
+
# ============================================================
|
| 36 |
+
# RAG — Chunking
|
| 37 |
+
# ============================================================
|
| 38 |
+
|
| 39 |
+
RAG_CHUNK_SIZE=2000
|
| 40 |
+
RAG_CHUNK_OVERLAP=150
|
| 41 |
+
|
| 42 |
+
# ============================================================
|
| 43 |
+
# RAG — Retrieval
|
| 44 |
+
# ============================================================
|
| 45 |
+
|
| 46 |
+
# How many docs to fetch from FAISS before reranking
|
| 47 |
+
RAG_INITIAL_FETCH_K=20
|
| 48 |
+
|
| 49 |
+
# How many docs to return after reranking (final result count)
|
| 50 |
+
RAG_RERANKER_K=5
|
| 51 |
+
|
| 52 |
+
# Max files to process in a single incremental index update
|
| 53 |
+
RAG_MAX_FILES_FOR_INCREMENTAL=50
|
| 54 |
+
|
| 55 |
+
# ============================================================
|
| 56 |
+
# CSV Specific Parameters
|
| 57 |
+
# ============================================================
|
| 58 |
+
|
| 59 |
+
# Maximum number of results to return from CSV sources
|
| 60 |
+
RAG_CSV_MAX_RESULTS=50
|
| 61 |
+
|
| 62 |
+
# Threshold for CSV match confidence
|
| 63 |
+
# (If reranker is enabled, higher is better e.g., 0.0 to 1.0)
|
| 64 |
+
# (If reranker is disabled/FAISS L2, lower is better e.g., < 1.0)
|
| 65 |
+
RAG_CSV_CONFIDENCE_THRESHOLD=0.1
|
| 66 |
+
|
| 67 |
+
# ============================================================
|
| 68 |
+
# RAG — Reranker Model
|
| 69 |
+
# ============================================================
|
| 70 |
+
|
| 71 |
+
RAG_RERANKER_MODEL=jinaai/jina-reranker-v2-base-multilingual
|
| 72 |
+
|
| 73 |
+
# Set to False to disable reranking entirely
|
| 74 |
+
RAG_RERANKER_ENABLED=False
|
| 75 |
+
|
| 76 |
+
# ============================================================
|
| 77 |
+
# URL Source (General URL / Rentry)
|
| 78 |
+
# ============================================================
|
| 79 |
+
|
| 80 |
+
# Toggle fetching from external URL completely on/off
|
| 81 |
+
URL_FETCH_ENABLED=False
|
| 82 |
+
|
| 83 |
+
# General URL to scrape (replacing RENTRY_URL)
|
| 84 |
+
EXTERNAL_URL="https://rentry.co/ada"
|
| 85 |
+
|
| 86 |
+
# How often (in minutes) to auto-refresh from the URL (0 = disabled)
|
| 87 |
+
URL_UPDATE_PERIOD_MINUTES=60
|
| 88 |
+
|
| 89 |
+
# ============================================================
|
| 90 |
+
# Google Drive — Source Documents Folder
|
| 91 |
+
# ============================================================
|
| 92 |
+
|
| 93 |
+
# Set to True to download source docs from a GDrive folder on startup
|
| 94 |
+
GDRIVE_SOURCES_ENABLED=False
|
| 95 |
+
GDRIVE_FOLDER_URL=
|
| 96 |
+
|
| 97 |
+
# ============================================================
|
| 98 |
+
# Google Drive — Pre-built FAISS Index (ZIP)
|
| 99 |
+
# ============================================================
|
| 100 |
+
|
| 101 |
+
# Set to True to download a pre-built FAISS index ZIP from GDrive on startup
|
| 102 |
+
GDRIVE_INDEX_ENABLED=False
|
| 103 |
+
GDRIVE_INDEX_URL=
|
| 104 |
+
|
| 105 |
+
# ============================================================
|
| 106 |
+
# Google Drive — Users CSV
|
| 107 |
+
# ============================================================
|
| 108 |
+
|
| 109 |
+
# Set to True to download users.csv from GDrive on startup
|
| 110 |
+
GDRIVE_USERS_CSV_ENABLED=False
|
| 111 |
+
GDRIVE_USERS_CSV_URL=
|
| 112 |
+
|
| 113 |
+
# ============================================================
|
| 114 |
+
# Logging
|
| 115 |
+
# ============================================================
|
| 116 |
+
|
| 117 |
+
RAG_DETAILED_LOGGING=True
|
Dockerfile
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Use Python 3.11 to support Pandas 3.x and newer libraries
|
| 2 |
+
FROM python:3.11-slim
|
| 3 |
+
|
| 4 |
+
# Set the working directory in the container
|
| 5 |
+
WORKDIR /app
|
| 6 |
+
|
| 7 |
+
# Install system dependencies
|
| 8 |
+
# libgl1 and libglib2.0-0 are often needed for CV/PDF libraries
|
| 9 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 10 |
+
libgl1 \
|
| 11 |
+
libglib2.0-0 \
|
| 12 |
+
build-essential \
|
| 13 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 14 |
+
|
| 15 |
+
# Copy the requirements file
|
| 16 |
+
COPY requirements.txt requirements.txt
|
| 17 |
+
|
| 18 |
+
# Install Python packages with timeout increase
|
| 19 |
+
RUN pip install --no-cache-dir --timeout=1000 -r requirements.txt
|
| 20 |
+
|
| 21 |
+
# Copy application code
|
| 22 |
+
COPY . /app
|
| 23 |
+
|
| 24 |
+
# Create a non-root user (Security Best Practice)
|
| 25 |
+
RUN useradd -m -u 1000 user
|
| 26 |
+
|
| 27 |
+
# Change ownership of the app directory and the temp directories
|
| 28 |
+
RUN chown -R user:user /app
|
| 29 |
+
|
| 30 |
+
# Create temp directories for HuggingFace/Torch cache and set permissions
|
| 31 |
+
RUN mkdir -p /tmp/transformers_cache /tmp/hf_home /tmp/torch_home && \
|
| 32 |
+
chown -R user:user /tmp/transformers_cache /tmp/hf_home /tmp/torch_home
|
| 33 |
+
|
| 34 |
+
# Switch to the non-root user
|
| 35 |
+
USER user
|
| 36 |
+
|
| 37 |
+
# Expose the port (Standard for HF Spaces)
|
| 38 |
+
EXPOSE 7860
|
| 39 |
+
|
| 40 |
+
# Set environment variables
|
| 41 |
+
ENV FLASK_HOST=0.0.0.0
|
| 42 |
+
ENV FLASK_PORT=7860
|
| 43 |
+
ENV FLASK_DEBUG=False
|
| 44 |
+
|
| 45 |
+
# CRITICAL: Set HF-specific env vars to writable directories
|
| 46 |
+
ENV TRANSFORMERS_CACHE=/tmp/transformers_cache
|
| 47 |
+
ENV HF_HOME=/tmp/hf_home
|
| 48 |
+
ENV TORCH_HOME=/tmp/torch_home
|
| 49 |
+
|
| 50 |
+
# Command to run the app
|
| 51 |
+
CMD ["python", "app.py"]
|
app.py
ADDED
|
@@ -0,0 +1,339 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from flask import Flask, request, jsonify, Response, render_template
|
| 2 |
+
from flask_cors import CORS
|
| 3 |
+
import os
|
| 4 |
+
import logging
|
| 5 |
+
import functools
|
| 6 |
+
import pandas as pd
|
| 7 |
+
import threading
|
| 8 |
+
import time
|
| 9 |
+
import tempfile
|
| 10 |
+
import shutil
|
| 11 |
+
from dotenv import load_dotenv
|
| 12 |
+
|
| 13 |
+
# Load environment variables
|
| 14 |
+
load_dotenv()
|
| 15 |
+
|
| 16 |
+
# Custom Imports
|
| 17 |
+
from rag_system import initialize_and_get_rag_system
|
| 18 |
+
from config import (
|
| 19 |
+
API_USERNAME, API_PASSWORD, RAG_SOURCES_DIR, RAG_STORAGE_PARENT_DIR,
|
| 20 |
+
GDRIVE_INDEX_ENABLED, GDRIVE_INDEX_ID_OR_URL,
|
| 21 |
+
GDRIVE_USERS_CSV_ENABLED, GDRIVE_USERS_CSV_ID_OR_URL,
|
| 22 |
+
ADMIN_USERNAME, ADMIN_PASSWORD, RAG_RERANKER_K,
|
| 23 |
+
EXTERNAL_URL, URL_UPDATE_PERIOD_MINUTES, URL_FETCH_ENABLED,
|
| 24 |
+
RAG_CSV_MAX_RESULTS, RAG_CSV_CONFIDENCE_THRESHOLD
|
| 25 |
+
)
|
| 26 |
+
from utils import download_and_unzip_gdrive_file, download_gdrive_file, fetch_and_clean_url
|
| 27 |
+
|
| 28 |
+
# Logging Setup
|
| 29 |
+
logging.basicConfig(level=logging.INFO)
|
| 30 |
+
logger = logging.getLogger(__name__)
|
| 31 |
+
|
| 32 |
+
# Flask Init
|
| 33 |
+
app = Flask(__name__, static_folder='static', template_folder='templates')
|
| 34 |
+
CORS(app)
|
| 35 |
+
|
| 36 |
+
# Global State
|
| 37 |
+
rag_system = None
|
| 38 |
+
user_df = None
|
| 39 |
+
_APP_BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
| 40 |
+
|
| 41 |
+
# --- Helper: Load Users ---
|
| 42 |
+
def load_users_from_csv():
|
| 43 |
+
global user_df
|
| 44 |
+
assets_folder = os.path.join(_APP_BASE_DIR, 'assets')
|
| 45 |
+
os.makedirs(assets_folder, exist_ok=True)
|
| 46 |
+
users_csv_path = os.path.join(assets_folder, 'users.csv')
|
| 47 |
+
|
| 48 |
+
try:
|
| 49 |
+
if os.path.exists(users_csv_path):
|
| 50 |
+
user_df = pd.read_csv(users_csv_path)
|
| 51 |
+
# Normalize email
|
| 52 |
+
if 'email' in user_df.columns:
|
| 53 |
+
user_df['email'] = user_df['email'].str.lower().str.strip()
|
| 54 |
+
logger.info(f"Loaded {len(user_df)} users from CSV.")
|
| 55 |
+
else:
|
| 56 |
+
logger.warning("users.csv not found in assets folder.")
|
| 57 |
+
user_df = None
|
| 58 |
+
except Exception as e:
|
| 59 |
+
logger.error(f"Failed to load users.csv: {e}")
|
| 60 |
+
user_df = None
|
| 61 |
+
|
| 62 |
+
# --- Helper: Auth Decorators ---
|
| 63 |
+
def require_api_auth(f):
|
| 64 |
+
"""Protects the N8N Webhook endpoint"""
|
| 65 |
+
@functools.wraps(f)
|
| 66 |
+
def decorated(*args, **kwargs):
|
| 67 |
+
auth = request.authorization
|
| 68 |
+
if not auth or auth.username != API_USERNAME or auth.password != API_PASSWORD:
|
| 69 |
+
return Response('Unauthorized', 401, {'WWW-Authenticate': 'Basic realm="API Login Required"'})
|
| 70 |
+
return f(*args, **kwargs)
|
| 71 |
+
return decorated
|
| 72 |
+
|
| 73 |
+
def require_admin_auth(f):
|
| 74 |
+
"""Protects Admin Rebuild/Update endpoints"""
|
| 75 |
+
@functools.wraps(f)
|
| 76 |
+
def decorated(*args, **kwargs):
|
| 77 |
+
auth = request.authorization
|
| 78 |
+
if not auth:
|
| 79 |
+
return jsonify({"error": "Unauthorized"}), 401
|
| 80 |
+
|
| 81 |
+
if user_df is not None:
|
| 82 |
+
user_email = auth.username.lower().strip()
|
| 83 |
+
user_record = user_df[user_df['email'] == user_email]
|
| 84 |
+
if not user_record.empty:
|
| 85 |
+
user_data = user_record.iloc[0]
|
| 86 |
+
if str(user_data['password']) == auth.password and user_data['role'] == 'admin':
|
| 87 |
+
return f(*args, **kwargs)
|
| 88 |
+
|
| 89 |
+
if auth.username == ADMIN_USERNAME and auth.password == ADMIN_PASSWORD:
|
| 90 |
+
return f(*args, **kwargs)
|
| 91 |
+
|
| 92 |
+
return jsonify({"error": "Unauthorized"}), 401
|
| 93 |
+
return decorated
|
| 94 |
+
|
| 95 |
+
# --- URL Zero-Downtime Updater ---
|
| 96 |
+
def trigger_url_update():
|
| 97 |
+
global rag_system
|
| 98 |
+
if not URL_FETCH_ENABLED or not EXTERNAL_URL:
|
| 99 |
+
return {"error": "External URL fetching is disabled or not configured"}
|
| 100 |
+
|
| 101 |
+
logger.info(f"[URL_UPDATE] Starting zero-downtime fetch from {EXTERNAL_URL}")
|
| 102 |
+
|
| 103 |
+
# 1. Create temporary staging folders
|
| 104 |
+
temp_staging_sources = tempfile.mkdtemp(prefix="rag_sources_temp_")
|
| 105 |
+
temp_index = tempfile.mkdtemp(prefix="rag_index_temp_")
|
| 106 |
+
|
| 107 |
+
try:
|
| 108 |
+
# 2. COMBINE SOURCES: Copy existing GDrive/Local sources to staging first
|
| 109 |
+
if os.path.exists(RAG_SOURCES_DIR):
|
| 110 |
+
shutil.copytree(RAG_SOURCES_DIR, temp_staging_sources, dirs_exist_ok=True)
|
| 111 |
+
|
| 112 |
+
# 3. Fetch URL data — saved to <app_root>/tmp/ for persistence & inspection
|
| 113 |
+
tmp_dir = os.path.join(_APP_BASE_DIR, 'tmp')
|
| 114 |
+
os.makedirs(tmp_dir, exist_ok=True)
|
| 115 |
+
url_out_path = os.path.join(tmp_dir, "url_data.txt")
|
| 116 |
+
success = fetch_and_clean_url(EXTERNAL_URL, url_out_path)
|
| 117 |
+
|
| 118 |
+
if not success:
|
| 119 |
+
return {"error": "Failed to fetch or parse the URL."}
|
| 120 |
+
|
| 121 |
+
# Copy from tmp/ into staging so it gets indexed alongside other sources
|
| 122 |
+
shutil.copy2(url_out_path, os.path.join(temp_staging_sources, "url_data.txt"))
|
| 123 |
+
|
| 124 |
+
# 4. Build a brand new RAG instance isolated in the temp directories
|
| 125 |
+
new_rag = initialize_and_get_rag_system(
|
| 126 |
+
force_rebuild=True,
|
| 127 |
+
source_dir_override=temp_staging_sources,
|
| 128 |
+
storage_dir_override=temp_index
|
| 129 |
+
)
|
| 130 |
+
|
| 131 |
+
if new_rag is None:
|
| 132 |
+
raise Exception("Failed to build new RAG index from parsed text.")
|
| 133 |
+
|
| 134 |
+
# 5. Atomic Swap (Now incoming requests hit the new DB immediately)
|
| 135 |
+
rag_system = new_rag
|
| 136 |
+
|
| 137 |
+
# 6. Backup/Replace persistent INDEX directory ONLY
|
| 138 |
+
os.makedirs(RAG_STORAGE_PARENT_DIR, exist_ok=True)
|
| 139 |
+
shutil.copytree(temp_index, RAG_STORAGE_PARENT_DIR, dirs_exist_ok=True)
|
| 140 |
+
|
| 141 |
+
rag_system.index_storage_dir = RAG_STORAGE_PARENT_DIR
|
| 142 |
+
|
| 143 |
+
logger.info("[URL_UPDATE] Success! RAG database updated combining Local, GDrive, and URL sources.")
|
| 144 |
+
return {"status": "success", "message": "Database successfully updated using combined sources."}
|
| 145 |
+
|
| 146 |
+
except Exception as e:
|
| 147 |
+
logger.error(f"[URL_UPDATE] Error during update: {e}", exc_info=True)
|
| 148 |
+
return {"error": str(e)}
|
| 149 |
+
|
| 150 |
+
finally:
|
| 151 |
+
shutil.rmtree(temp_staging_sources, ignore_errors=True)
|
| 152 |
+
shutil.rmtree(temp_index, ignore_errors=True)
|
| 153 |
+
|
| 154 |
+
def url_periodic_loop():
|
| 155 |
+
if not URL_FETCH_ENABLED or not EXTERNAL_URL or URL_UPDATE_PERIOD_MINUTES <= 0:
|
| 156 |
+
logger.info("Periodic URL updates disabled.")
|
| 157 |
+
return
|
| 158 |
+
|
| 159 |
+
logger.info(f"[URL_UPDATE] Background thread started for: {EXTERNAL_URL}")
|
| 160 |
+
trigger_url_update()
|
| 161 |
+
|
| 162 |
+
while True:
|
| 163 |
+
time.sleep(URL_UPDATE_PERIOD_MINUTES * 60)
|
| 164 |
+
logger.info(f"[URL_UPDATE] Triggering scheduled periodic update...")
|
| 165 |
+
trigger_url_update()
|
| 166 |
+
|
| 167 |
+
# --- Startup Logic ---
|
| 168 |
+
def run_startup_tasks():
|
| 169 |
+
global rag_system
|
| 170 |
+
logger.info("--- Executing Startup Tasks ---")
|
| 171 |
+
|
| 172 |
+
if GDRIVE_USERS_CSV_ENABLED and GDRIVE_USERS_CSV_ID_OR_URL:
|
| 173 |
+
target = os.path.join(_APP_BASE_DIR, 'assets', 'users.csv')
|
| 174 |
+
download_gdrive_file(GDRIVE_USERS_CSV_ID_OR_URL, target)
|
| 175 |
+
|
| 176 |
+
load_users_from_csv()
|
| 177 |
+
|
| 178 |
+
if GDRIVE_INDEX_ENABLED and GDRIVE_INDEX_ID_OR_URL:
|
| 179 |
+
download_and_unzip_gdrive_file(GDRIVE_INDEX_ID_OR_URL, os.getcwd())
|
| 180 |
+
|
| 181 |
+
rag_system = initialize_and_get_rag_system()
|
| 182 |
+
|
| 183 |
+
if URL_FETCH_ENABLED and EXTERNAL_URL:
|
| 184 |
+
threading.Thread(target=url_periodic_loop, daemon=True).start()
|
| 185 |
+
|
| 186 |
+
logger.info("--- Startup Tasks Complete ---")
|
| 187 |
+
|
| 188 |
+
with app.app_context():
|
| 189 |
+
run_startup_tasks()
|
| 190 |
+
|
| 191 |
+
# ===========================
|
| 192 |
+
# API ROUTES
|
| 193 |
+
# ===========================
|
| 194 |
+
|
| 195 |
+
@app.route('/webhook/search', methods=['POST'])
|
| 196 |
+
@require_api_auth
|
| 197 |
+
def search_knowledgebase_api():
|
| 198 |
+
if not rag_system:
|
| 199 |
+
return jsonify({"error": "RAG not initialized. Check server logs."}), 503
|
| 200 |
+
|
| 201 |
+
data = request.json or {}
|
| 202 |
+
query = data.get('query')
|
| 203 |
+
if not query:
|
| 204 |
+
return jsonify({"error": "Query field is required"}), 400
|
| 205 |
+
|
| 206 |
+
top_k = data.get('final_k', RAG_RERANKER_K)
|
| 207 |
+
use_reranker = data.get('use_reranker', True)
|
| 208 |
+
|
| 209 |
+
# 1. NEW: Extract the 'cleaned' parameter (defaults to False)
|
| 210 |
+
cleaned = data.get('cleaned', False)
|
| 211 |
+
|
| 212 |
+
if rag_system.retriever:
|
| 213 |
+
if not use_reranker:
|
| 214 |
+
rag_system.retriever.reranker = None
|
| 215 |
+
elif use_reranker and rag_system.reranker:
|
| 216 |
+
rag_system.retriever.reranker = rag_system.reranker
|
| 217 |
+
|
| 218 |
+
try:
|
| 219 |
+
raw_results = rag_system.search_knowledge_base(query, top_k=top_k)
|
| 220 |
+
|
| 221 |
+
# Apply CSV limitations and thresholds
|
| 222 |
+
final_results = []
|
| 223 |
+
csv_count = 0
|
| 224 |
+
|
| 225 |
+
for res in raw_results:
|
| 226 |
+
is_csv = res["metadata"].get("source_type") == "csv" or res["metadata"].get("source_document_name", "").endswith(".csv")
|
| 227 |
+
|
| 228 |
+
if is_csv:
|
| 229 |
+
score = res["score"]
|
| 230 |
+
passed_threshold = False
|
| 231 |
+
|
| 232 |
+
# Check confidence limit depending on method used (reranker: higher is better | FAISS L2: lower is better)
|
| 233 |
+
if rag_system.reranker:
|
| 234 |
+
confidence = score
|
| 235 |
+
else:
|
| 236 |
+
# Convert FAISS L2 Distance into a 0-1 Confidence Score
|
| 237 |
+
confidence = 1 / (1 + score)
|
| 238 |
+
res["score"] = confidence # Update the result so the API shows the neat confidence score
|
| 239 |
+
|
| 240 |
+
passed_threshold = confidence >= RAG_CSV_CONFIDENCE_THRESHOLD
|
| 241 |
+
|
| 242 |
+
if passed_threshold and csv_count < RAG_CSV_MAX_RESULTS:
|
| 243 |
+
final_results.append(res)
|
| 244 |
+
csv_count += 1
|
| 245 |
+
else:
|
| 246 |
+
final_results.append(res)
|
| 247 |
+
|
| 248 |
+
# 2. NEW: If cleaned is True, strip out 'metadata' and 'score'
|
| 249 |
+
if cleaned:
|
| 250 |
+
final_results = [{"content": r["content"]} for r in final_results]
|
| 251 |
+
|
| 252 |
+
return jsonify({"results": final_results, "count": len(final_results), "status": "success"})
|
| 253 |
+
except Exception as e:
|
| 254 |
+
logger.error(f"Search API Error: {e}")
|
| 255 |
+
return jsonify({"error": str(e)}), 500
|
| 256 |
+
|
| 257 |
+
@app.route('/user-login', methods=['POST'])
|
| 258 |
+
def user_login():
|
| 259 |
+
if user_df is None:
|
| 260 |
+
return jsonify({"error": "User database not available."}), 503
|
| 261 |
+
|
| 262 |
+
data = request.json
|
| 263 |
+
email = data.get('email', '').lower().strip()
|
| 264 |
+
password = data.get('password')
|
| 265 |
+
|
| 266 |
+
if not email or not password:
|
| 267 |
+
return jsonify({"error": "Email and password required"}), 400
|
| 268 |
+
|
| 269 |
+
user_record = user_df[user_df['email'] == email]
|
| 270 |
+
if not user_record.empty:
|
| 271 |
+
u_data = user_record.iloc[0]
|
| 272 |
+
if str(u_data['password']) == str(password):
|
| 273 |
+
resp = u_data.to_dict()
|
| 274 |
+
if 'password' in resp:
|
| 275 |
+
del resp['password']
|
| 276 |
+
return jsonify(resp), 200
|
| 277 |
+
|
| 278 |
+
return jsonify({"error": "Invalid credentials"}), 401
|
| 279 |
+
|
| 280 |
+
@app.route('/')
|
| 281 |
+
def index_route():
|
| 282 |
+
return render_template('chat-bot.html')
|
| 283 |
+
|
| 284 |
+
@app.route('/admin/login', methods=['POST'])
|
| 285 |
+
@require_admin_auth
|
| 286 |
+
def admin_login():
|
| 287 |
+
return jsonify({"status": "success", "message": "Authenticated"}), 200
|
| 288 |
+
|
| 289 |
+
@app.route('/admin/update_faiss_index', methods=['POST'])
|
| 290 |
+
@require_admin_auth
|
| 291 |
+
def update_faiss_index():
|
| 292 |
+
if not rag_system:
|
| 293 |
+
return jsonify({"error": "RAG system not initialized"}), 503
|
| 294 |
+
|
| 295 |
+
data = request.json or {}
|
| 296 |
+
max_files = data.get('max_new_files')
|
| 297 |
+
|
| 298 |
+
try:
|
| 299 |
+
result = rag_system.update_index_with_new_files(RAG_SOURCES_DIR, max_files)
|
| 300 |
+
return jsonify(result), 200
|
| 301 |
+
except Exception as e:
|
| 302 |
+
return jsonify({"error": str(e)}), 500
|
| 303 |
+
|
| 304 |
+
@app.route('/admin/rebuild_index', methods=['POST'])
|
| 305 |
+
@require_admin_auth
|
| 306 |
+
def rebuild_index():
|
| 307 |
+
global rag_system
|
| 308 |
+
try:
|
| 309 |
+
if URL_FETCH_ENABLED and EXTERNAL_URL:
|
| 310 |
+
result = trigger_url_update()
|
| 311 |
+
if "error" in result:
|
| 312 |
+
return jsonify(result), 500
|
| 313 |
+
return jsonify({"status": "Index rebuilt successfully using combined local & URL sources"}), 200
|
| 314 |
+
else:
|
| 315 |
+
rag_system = initialize_and_get_rag_system(force_rebuild=True)
|
| 316 |
+
return jsonify({"status": "Index rebuilt successfully"}), 200
|
| 317 |
+
except Exception as e:
|
| 318 |
+
return jsonify({"error": str(e)}), 500
|
| 319 |
+
|
| 320 |
+
# Retained specific endpoint name to ensure the frontend doesn't break
|
| 321 |
+
@app.route('/admin/fetch_rentry', methods=['POST'])
|
| 322 |
+
@require_admin_auth
|
| 323 |
+
def api_fetch_url():
|
| 324 |
+
result = trigger_url_update()
|
| 325 |
+
if "error" in result:
|
| 326 |
+
return jsonify(result), 500
|
| 327 |
+
return jsonify(result), 200
|
| 328 |
+
|
| 329 |
+
@app.route('/status', methods=['GET'])
|
| 330 |
+
def status_route():
|
| 331 |
+
return jsonify({
|
| 332 |
+
"status": "online",
|
| 333 |
+
"rag_initialized": rag_system is not None,
|
| 334 |
+
"users_loaded": user_df is not None
|
| 335 |
+
})
|
| 336 |
+
|
| 337 |
+
if __name__ == '__main__':
|
| 338 |
+
port = int(os.environ.get("PORT", 7860))
|
| 339 |
+
app.run(host='0.0.0.0', port=port)
|
assets/users.csv
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
sl,name,email,password,role
|
| 2 |
+
1,Sifat Hossain Fahim,fahim@ge-bd.com,WorldTour1234!,admin
|
| 3 |
+
2,Sakib Ahmed,sakib.ahmed@ge-bd.com,12345678!,admin
|
| 4 |
+
3,Rezwanul Islam,rezwanul@ge-bd.com,marstour1234!,admin
|
| 5 |
+
4,Sarwar Jahan,sarwar.piel@ge-bd.com,password123,user
|
| 6 |
+
5,Rezaul Kabir,rezaul.kabir@ge-bd.com,securepass,user
|
| 7 |
+
6,Test,test@test.com,12345678!,user
|
| 8 |
+
7,Sadiquzzaman,sadiquzzaman@ge-bd.com,wqeqw1234,user
|
| 9 |
+
8,Sadman,sadman@ge-bd.com,1234fvb,user
|
| 10 |
+
9,Pavel,pavel@ge-bd.com,12314rdf,user
|
| 11 |
+
10,Sajib,sajib.hossain@ge-bd.com,1234rge,user
|
| 12 |
+
11,Abdur Rahim,arahim@ge-bd.com,23ree4rt,user
|
chunker.py
ADDED
|
@@ -0,0 +1,135 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import logging
|
| 3 |
+
import json
|
| 4 |
+
import argparse
|
| 5 |
+
import csv
|
| 6 |
+
from typing import List, Dict, Optional
|
| 7 |
+
|
| 8 |
+
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
| 9 |
+
|
| 10 |
+
from utils import extract_text_from_file, FAISS_RAG_SUPPORTED_EXTENSIONS
|
| 11 |
+
|
| 12 |
+
# --- Logging Setup ---
|
| 13 |
+
logging.basicConfig(
|
| 14 |
+
level=logging.INFO,
|
| 15 |
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
| 16 |
+
handlers=[
|
| 17 |
+
logging.StreamHandler()
|
| 18 |
+
]
|
| 19 |
+
)
|
| 20 |
+
logger = logging.getLogger(__name__)
|
| 21 |
+
|
| 22 |
+
def process_sources_and_create_chunks(
|
| 23 |
+
sources_dir: str,
|
| 24 |
+
output_file: str,
|
| 25 |
+
chunk_size: int = 1000,
|
| 26 |
+
chunk_overlap: int = 150,
|
| 27 |
+
text_output_dir: Optional[str] = None
|
| 28 |
+
) -> None:
|
| 29 |
+
if not os.path.isdir(sources_dir):
|
| 30 |
+
logger.error(f"Source directory not found: '{sources_dir}'")
|
| 31 |
+
raise FileNotFoundError(f"Source directory not found: '{sources_dir}'")
|
| 32 |
+
|
| 33 |
+
logger.info(f"Starting chunking process. Sources: '{sources_dir}', Output: '{output_file}'")
|
| 34 |
+
|
| 35 |
+
if text_output_dir:
|
| 36 |
+
os.makedirs(text_output_dir, exist_ok=True)
|
| 37 |
+
logger.info(f"Will save raw extracted text to: '{text_output_dir}'")
|
| 38 |
+
|
| 39 |
+
all_chunks_for_json: List[Dict] = []
|
| 40 |
+
processed_files_count = 0
|
| 41 |
+
|
| 42 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
|
| 43 |
+
|
| 44 |
+
for filename in os.listdir(sources_dir):
|
| 45 |
+
file_path = os.path.join(sources_dir, filename)
|
| 46 |
+
if not os.path.isfile(file_path):
|
| 47 |
+
continue
|
| 48 |
+
|
| 49 |
+
file_ext = filename.split('.')[-1].lower()
|
| 50 |
+
if file_ext not in FAISS_RAG_SUPPORTED_EXTENSIONS:
|
| 51 |
+
logger.debug(f"Skipping unsupported file: {filename}")
|
| 52 |
+
continue
|
| 53 |
+
|
| 54 |
+
logger.info(f"Processing source file: {filename}")
|
| 55 |
+
|
| 56 |
+
# CSV Handling natively row by row
|
| 57 |
+
if file_ext == 'csv':
|
| 58 |
+
try:
|
| 59 |
+
with open(file_path, mode='r', encoding='utf-8-sig') as f:
|
| 60 |
+
reader = csv.DictReader(f)
|
| 61 |
+
for i, row in enumerate(reader):
|
| 62 |
+
row_text = "\n".join([f"{k}: {v}" for k, v in row.items() if k and v and str(v).strip()])
|
| 63 |
+
chunk_data = {
|
| 64 |
+
"page_content": row_text,
|
| 65 |
+
"metadata": {
|
| 66 |
+
"source_document_name": filename,
|
| 67 |
+
"chunk_index": i,
|
| 68 |
+
"full_location": f"{filename}, Row {i+1}",
|
| 69 |
+
"source_type": "csv"
|
| 70 |
+
}
|
| 71 |
+
}
|
| 72 |
+
all_chunks_for_json.append(chunk_data)
|
| 73 |
+
processed_files_count += 1
|
| 74 |
+
except Exception as e:
|
| 75 |
+
logger.error(f"Error processing CSV {filename}: {e}")
|
| 76 |
+
else:
|
| 77 |
+
text_content = FAISS_RAG_SUPPORTED_EXTENSIONS[file_ext](file_path)
|
| 78 |
+
|
| 79 |
+
if text_content and text_content != "CSV_HANDLED_NATIVELY":
|
| 80 |
+
if text_output_dir:
|
| 81 |
+
try:
|
| 82 |
+
text_output_path = os.path.join(text_output_dir, f"{filename}.txt")
|
| 83 |
+
with open(text_output_path, 'w', encoding='utf-8') as f_text:
|
| 84 |
+
f_text.write(text_content)
|
| 85 |
+
except Exception as e_text_save:
|
| 86 |
+
logger.error(f"Could not save extracted text for '{filename}': {e_text_save}")
|
| 87 |
+
|
| 88 |
+
chunks = text_splitter.split_text(text_content)
|
| 89 |
+
for i, chunk_text in enumerate(chunks):
|
| 90 |
+
chunk_data = {
|
| 91 |
+
"page_content": chunk_text,
|
| 92 |
+
"metadata": {
|
| 93 |
+
"source_document_name": filename,
|
| 94 |
+
"chunk_index": i,
|
| 95 |
+
"full_location": f"{filename}, Chunk {i+1}"
|
| 96 |
+
}
|
| 97 |
+
}
|
| 98 |
+
all_chunks_for_json.append(chunk_data)
|
| 99 |
+
|
| 100 |
+
processed_files_count += 1
|
| 101 |
+
|
| 102 |
+
if not all_chunks_for_json:
|
| 103 |
+
logger.warning(f"No processable documents found in '{sources_dir}'.")
|
| 104 |
+
|
| 105 |
+
output_dir = os.path.dirname(output_file)
|
| 106 |
+
os.makedirs(output_dir, exist_ok=True)
|
| 107 |
+
|
| 108 |
+
with open(output_file, 'w', encoding='utf-8') as f:
|
| 109 |
+
json.dump(all_chunks_for_json, f, indent=2)
|
| 110 |
+
|
| 111 |
+
logger.info(f"Chunking complete. Processed {processed_files_count} files. Total chunks: {len(all_chunks_for_json)}")
|
| 112 |
+
|
| 113 |
+
def main():
|
| 114 |
+
parser = argparse.ArgumentParser()
|
| 115 |
+
parser.add_argument('--sources-dir', type=str, required=True)
|
| 116 |
+
parser.add_argument('--output-file', type=str, required=True)
|
| 117 |
+
parser.add_argument('--text-output-dir', type=str, default=None)
|
| 118 |
+
parser.add_argument('--chunk-size', type=int, default=1000)
|
| 119 |
+
parser.add_argument('--chunk-overlap', type=int, default=150)
|
| 120 |
+
args = parser.parse_args()
|
| 121 |
+
|
| 122 |
+
try:
|
| 123 |
+
process_sources_and_create_chunks(
|
| 124 |
+
sources_dir=args.sources_dir,
|
| 125 |
+
output_file=args.output_file,
|
| 126 |
+
chunk_size=args.chunk_size,
|
| 127 |
+
chunk_overlap=args.chunk_overlap,
|
| 128 |
+
text_output_dir=args.text_output_dir
|
| 129 |
+
)
|
| 130 |
+
except Exception as e:
|
| 131 |
+
logger.critical(f"Chunking failed: {e}", exc_info=True)
|
| 132 |
+
exit(1)
|
| 133 |
+
|
| 134 |
+
if __name__ == "__main__":
|
| 135 |
+
main()
|
config.py
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import logging
|
| 3 |
+
from dotenv import load_dotenv
|
| 4 |
+
|
| 5 |
+
load_dotenv()
|
| 6 |
+
|
| 7 |
+
# --- Logging Setup ---
|
| 8 |
+
logger = logging.getLogger(__name__)
|
| 9 |
+
if not logger.handlers:
|
| 10 |
+
logging.basicConfig(
|
| 11 |
+
level=logging.INFO,
|
| 12 |
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
| 13 |
+
)
|
| 14 |
+
|
| 15 |
+
_MODULE_BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
| 16 |
+
|
| 17 |
+
# API Authentication for n8n (Basic Auth)
|
| 18 |
+
API_USERNAME = os.getenv("API_USERNAME", "admin")
|
| 19 |
+
API_PASSWORD = os.getenv("API_PASSWORD", "password")
|
| 20 |
+
|
| 21 |
+
# Admin fallback credentials for dashboard (used when users.csv is missing or unavailable)
|
| 22 |
+
ADMIN_USERNAME = os.getenv('FLASK_ADMIN_USERNAME', 'admin')
|
| 23 |
+
ADMIN_PASSWORD = os.getenv('FLASK_ADMIN_PASSWORD', '1234')
|
| 24 |
+
|
| 25 |
+
# URL Fetcher configs (Generalized from Rentry)
|
| 26 |
+
URL_FETCH_ENABLED = os.getenv("URL_FETCH_ENABLED", "False").lower() == "true"
|
| 27 |
+
EXTERNAL_URL = os.getenv("EXTERNAL_URL", os.getenv("RENTRY_URL", ""))
|
| 28 |
+
URL_UPDATE_PERIOD_MINUTES = int(os.getenv("URL_UPDATE_PERIOD_MINUTES", os.getenv("RENTRY_UPDATE_PERIOD_MINUTES", "60")))
|
| 29 |
+
|
| 30 |
+
# CSV Configuration
|
| 31 |
+
RAG_CSV_MAX_RESULTS = int(os.getenv("RAG_CSV_MAX_RESULTS", "5"))
|
| 32 |
+
RAG_CSV_CONFIDENCE_THRESHOLD = float(os.getenv("RAG_CSV_CONFIDENCE_THRESHOLD", "0.5"))
|
| 33 |
+
|
| 34 |
+
RAG_FAISS_INDEX_SUBDIR_NAME = "faiss_index"
|
| 35 |
+
RAG_STORAGE_PARENT_DIR = os.getenv("RAG_STORAGE_DIR", os.path.join(_MODULE_BASE_DIR, "faiss_storage"))
|
| 36 |
+
RAG_SOURCES_DIR = os.getenv("SOURCES_DIR", os.path.join(_MODULE_BASE_DIR, "sources"))
|
| 37 |
+
RAG_CHUNKED_SOURCES_FILENAME = "pre_chunked_sources.json"
|
| 38 |
+
|
| 39 |
+
os.makedirs(RAG_SOURCES_DIR, exist_ok=True)
|
| 40 |
+
os.makedirs(RAG_STORAGE_PARENT_DIR, exist_ok=True)
|
| 41 |
+
|
| 42 |
+
# Embedding and model configuration
|
| 43 |
+
RAG_EMBEDDING_MODEL_NAME = os.getenv("RAG_EMBEDDING_MODEL", "BAAI/bge-small-en")
|
| 44 |
+
RAG_EMBEDDING_USE_GPU = os.getenv("RAG_EMBEDDING_GPU", "False").lower() == "true"
|
| 45 |
+
RAG_LOAD_INDEX_ON_STARTUP = os.getenv("RAG_LOAD_INDEX", "True").lower() == "true"
|
| 46 |
+
|
| 47 |
+
# Retrieval Settings
|
| 48 |
+
RAG_INITIAL_FETCH_K = int(os.getenv("RAG_INITIAL_FETCH_K", 20))
|
| 49 |
+
RAG_RERANKER_K = int(os.getenv("RAG_RERANKER_K", 5))
|
| 50 |
+
RAG_MAX_FILES_FOR_INCREMENTAL = int(os.getenv("RAG_MAX_FILES_FOR_INCREMENTAL", "50"))
|
| 51 |
+
|
| 52 |
+
# Chunk configuration
|
| 53 |
+
RAG_CHUNK_SIZE = int(os.getenv("RAG_CHUNK_SIZE", 2000))
|
| 54 |
+
RAG_CHUNK_OVERLAP = int(os.getenv("RAG_CHUNK_OVERLAP", 150))
|
| 55 |
+
|
| 56 |
+
# Reranker configuration
|
| 57 |
+
RAG_RERANKER_MODEL_NAME = os.getenv("RAG_RERANKER_MODEL", "jinaai/jina-reranker-v2-base-multilingual")
|
| 58 |
+
RAG_RERANKER_ENABLED = os.getenv("RAG_RERANKER_ENABLED", "True").lower() == "true"
|
| 59 |
+
|
| 60 |
+
# GDrive configuration for RAG sources
|
| 61 |
+
GDRIVE_SOURCES_ENABLED = os.getenv("GDRIVE_SOURCES_ENABLED", "False").lower() == "true"
|
| 62 |
+
GDRIVE_FOLDER_ID_OR_URL = os.getenv("GDRIVE_FOLDER_URL")
|
| 63 |
+
|
| 64 |
+
# GDrive configuration for downloading a pre-built FAISS index
|
| 65 |
+
GDRIVE_INDEX_ENABLED = os.getenv("GDRIVE_INDEX_ENABLED", "False").lower() == "true"
|
| 66 |
+
GDRIVE_INDEX_ID_OR_URL = os.getenv("GDRIVE_INDEX_URL")
|
| 67 |
+
|
| 68 |
+
# GDrive configuration for downloading users.csv
|
| 69 |
+
GDRIVE_USERS_CSV_ENABLED = os.getenv("GDRIVE_USERS_CSV_ENABLED", "False").lower() == "true"
|
| 70 |
+
GDRIVE_USERS_CSV_ID_OR_URL = os.getenv("GDRIVE_USERS_CSV_URL")
|
| 71 |
+
|
| 72 |
+
RAG_DETAILED_LOGGING = os.getenv("RAG_DETAILED_LOGGING", "True").lower() == "true"
|
| 73 |
+
|
| 74 |
+
logger.info(f"RAG Config Loaded - Chunk Size: {RAG_CHUNK_SIZE}, Chunk Overlap: {RAG_CHUNK_OVERLAP}")
|
| 75 |
+
logger.info(f"Embedding Model: {RAG_EMBEDDING_MODEL_NAME}")
|
| 76 |
+
logger.info(f"Reranker Model: {RAG_RERANKER_MODEL_NAME}")
|
| 77 |
+
logger.info(f"Retrieval Pipeline: Initial Fetch K={RAG_INITIAL_FETCH_K}, Reranker Final K={RAG_RERANKER_K}")
|
| 78 |
+
logger.info(f"CSV Filters: Max Results={RAG_CSV_MAX_RESULTS}, Threshold={RAG_CSV_CONFIDENCE_THRESHOLD}")
|
| 79 |
+
logger.info(f"URL Fetching: {'ENABLED' if URL_FETCH_ENABLED else 'DISABLED'}")
|
| 80 |
+
logger.info(f"Detailed Logging: {'ENABLED' if RAG_DETAILED_LOGGING else 'DISABLED'}")
|
| 81 |
+
logger.info(f"GDrive Sources Download: {'ENABLED' if GDRIVE_SOURCES_ENABLED else 'DISABLED'}")
|
| 82 |
+
logger.info(f"GDrive Pre-built Index Download: {'ENABLED' if GDRIVE_INDEX_ENABLED else 'DISABLED'}")
|
| 83 |
+
logger.info(f"GDrive users.csv Download: {'ENABLED' if GDRIVE_USERS_CSV_ENABLED else 'DISABLED'}")
|
note.txt
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
https://aibot8800-rag-brain.hf.space
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
pip install "huggingface-hub>=0.34.0,<1.0"
|
| 6 |
+
pip install -U transformers sentence-transformers
|
| 7 |
+
pip install langchain-huggingface
|
postman.json
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"info": {
|
| 3 |
+
"_postman_id": "b8f9e9a1-5c8e-4a8e-9b8e-1f8e9a1f8e9a",
|
| 4 |
+
"name": "edmond_cad_refund",
|
| 5 |
+
"schema": "https://schema.getpostman.com/json/collection/v2.1.0/collection.json"
|
| 6 |
+
},
|
| 7 |
+
"item": [
|
| 8 |
+
{
|
| 9 |
+
"name": "N8N - Search Knowledgebase",
|
| 10 |
+
"request": {
|
| 11 |
+
"auth": {
|
| 12 |
+
"type": "basic",
|
| 13 |
+
"basic": [
|
| 14 |
+
{
|
| 15 |
+
"key": "password",
|
| 16 |
+
"value": "password",
|
| 17 |
+
"type": "string"
|
| 18 |
+
},
|
| 19 |
+
{
|
| 20 |
+
"key": "username",
|
| 21 |
+
"value": "admin",
|
| 22 |
+
"type": "string"
|
| 23 |
+
}
|
| 24 |
+
]
|
| 25 |
+
},
|
| 26 |
+
"method": "POST",
|
| 27 |
+
"header": [],
|
| 28 |
+
"body": {
|
| 29 |
+
"mode": "raw",
|
| 30 |
+
"raw": "{\n \"query\": \"how to get a refund for electronics?\",\n \"use_reranker\": true,\n \"final_k\": 5,\n \"persona\": [\"standard\"],\n \"tier\": [\"gold\"]\n}",
|
| 31 |
+
"options": {
|
| 32 |
+
"raw": {
|
| 33 |
+
"language": "json"
|
| 34 |
+
}
|
| 35 |
+
}
|
| 36 |
+
},
|
| 37 |
+
"url": {
|
| 38 |
+
"raw": "{{base_url}}/webhook/search",
|
| 39 |
+
"host": [
|
| 40 |
+
"{{base_url}}"
|
| 41 |
+
],
|
| 42 |
+
"path": [
|
| 43 |
+
"webhook",
|
| 44 |
+
"search"
|
| 45 |
+
]
|
| 46 |
+
},
|
| 47 |
+
"description": "Main endpoint used by N8N to retrieve context chunks."
|
| 48 |
+
},
|
| 49 |
+
"response": []
|
| 50 |
+
},
|
| 51 |
+
{
|
| 52 |
+
"name": "Admin - Rebuild Index",
|
| 53 |
+
"request": {
|
| 54 |
+
"auth": {
|
| 55 |
+
"type": "basic",
|
| 56 |
+
"basic": [
|
| 57 |
+
{
|
| 58 |
+
"key": "password",
|
| 59 |
+
"value": "1234",
|
| 60 |
+
"type": "string"
|
| 61 |
+
},
|
| 62 |
+
{
|
| 63 |
+
"key": "username",
|
| 64 |
+
"value": "admin",
|
| 65 |
+
"type": "string"
|
| 66 |
+
}
|
| 67 |
+
]
|
| 68 |
+
},
|
| 69 |
+
"method": "POST",
|
| 70 |
+
"header": [],
|
| 71 |
+
"url": {
|
| 72 |
+
"raw": "{{base_url}}/admin/rebuild_index",
|
| 73 |
+
"host": [
|
| 74 |
+
"{{base_url}}"
|
| 75 |
+
],
|
| 76 |
+
"path": [
|
| 77 |
+
"admin",
|
| 78 |
+
"rebuild_index"
|
| 79 |
+
]
|
| 80 |
+
},
|
| 81 |
+
"description": "Completely deletes and rebuilds the FAISS index from sources."
|
| 82 |
+
},
|
| 83 |
+
"response": []
|
| 84 |
+
},
|
| 85 |
+
{
|
| 86 |
+
"name": "Admin - Update Index (Incremental)",
|
| 87 |
+
"request": {
|
| 88 |
+
"auth": {
|
| 89 |
+
"type": "basic",
|
| 90 |
+
"basic": [
|
| 91 |
+
{
|
| 92 |
+
"key": "password",
|
| 93 |
+
"value": "1234",
|
| 94 |
+
"type": "string"
|
| 95 |
+
},
|
| 96 |
+
{
|
| 97 |
+
"key": "username",
|
| 98 |
+
"value": "admin",
|
| 99 |
+
"type": "string"
|
| 100 |
+
}
|
| 101 |
+
]
|
| 102 |
+
},
|
| 103 |
+
"method": "POST",
|
| 104 |
+
"header": [],
|
| 105 |
+
"body": {
|
| 106 |
+
"mode": "raw",
|
| 107 |
+
"raw": "{\n \"max_new_files\": 50\n}",
|
| 108 |
+
"options": {
|
| 109 |
+
"raw": {
|
| 110 |
+
"language": "json"
|
| 111 |
+
}
|
| 112 |
+
}
|
| 113 |
+
},
|
| 114 |
+
"url": {
|
| 115 |
+
"raw": "{{base_url}}/admin/update_faiss_index",
|
| 116 |
+
"host": [
|
| 117 |
+
"{{base_url}}"
|
| 118 |
+
],
|
| 119 |
+
"path": [
|
| 120 |
+
"admin",
|
| 121 |
+
"update_faiss_index"
|
| 122 |
+
]
|
| 123 |
+
},
|
| 124 |
+
"description": "Adds only new files to the existing index."
|
| 125 |
+
},
|
| 126 |
+
"response": []
|
| 127 |
+
},
|
| 128 |
+
{
|
| 129 |
+
"name": "Public - Status",
|
| 130 |
+
"request": {
|
| 131 |
+
"method": "GET",
|
| 132 |
+
"header": [],
|
| 133 |
+
"url": {
|
| 134 |
+
"raw": "{{base_url}}/status",
|
| 135 |
+
"host": [
|
| 136 |
+
"{{base_url}}"
|
| 137 |
+
],
|
| 138 |
+
"path": [
|
| 139 |
+
"status"
|
| 140 |
+
]
|
| 141 |
+
}
|
| 142 |
+
},
|
| 143 |
+
"response": []
|
| 144 |
+
}
|
| 145 |
+
],
|
| 146 |
+
"variable": [
|
| 147 |
+
{
|
| 148 |
+
"key": "base_url",
|
| 149 |
+
"value": "http://localhost:5000",
|
| 150 |
+
"type": "string"
|
| 151 |
+
}
|
| 152 |
+
]
|
| 153 |
+
}
|
rag_components.py
ADDED
|
@@ -0,0 +1,357 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import logging
|
| 3 |
+
import json
|
| 4 |
+
import time
|
| 5 |
+
import csv
|
| 6 |
+
from typing import List, Dict, Optional, Any
|
| 7 |
+
|
| 8 |
+
import torch
|
| 9 |
+
from sentence_transformers import CrossEncoder
|
| 10 |
+
|
| 11 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
| 12 |
+
from langchain_community.vectorstores import FAISS
|
| 13 |
+
|
| 14 |
+
from langchain_core.documents import Document
|
| 15 |
+
from langchain_core.retrievers import BaseRetriever
|
| 16 |
+
from langchain_core.callbacks import CallbackManagerForRetrieverRun
|
| 17 |
+
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
| 18 |
+
|
| 19 |
+
from config import (
|
| 20 |
+
RAG_RERANKER_MODEL_NAME, RAG_DETAILED_LOGGING,
|
| 21 |
+
RAG_CHUNK_SIZE, RAG_CHUNK_OVERLAP, RAG_CHUNKED_SOURCES_FILENAME,
|
| 22 |
+
RAG_FAISS_INDEX_SUBDIR_NAME, RAG_INITIAL_FETCH_K, RAG_RERANKER_K,
|
| 23 |
+
RAG_MAX_FILES_FOR_INCREMENTAL
|
| 24 |
+
)
|
| 25 |
+
from utils import FAISS_RAG_SUPPORTED_EXTENSIONS
|
| 26 |
+
|
| 27 |
+
logger = logging.getLogger(__name__)
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
class DocumentReranker:
|
| 31 |
+
def __init__(self, model_name: str = RAG_RERANKER_MODEL_NAME):
|
| 32 |
+
self.logger = logging.getLogger(__name__ + ".DocumentReranker")
|
| 33 |
+
self.model_name = model_name
|
| 34 |
+
self.model = None
|
| 35 |
+
|
| 36 |
+
try:
|
| 37 |
+
self.logger.info(f"[RERANKER_INIT] Loading reranker model: {self.model_name}")
|
| 38 |
+
start_time = time.time()
|
| 39 |
+
self.model = CrossEncoder(model_name, trust_remote_code=True)
|
| 40 |
+
load_time = time.time() - start_time
|
| 41 |
+
self.logger.info(f"[RERANKER_INIT] Reranker model '{self.model_name}' loaded successfully in {load_time:.2f}s")
|
| 42 |
+
except Exception as e:
|
| 43 |
+
self.logger.error(f"[RERANKER_INIT] Failed to load reranker model '{self.model_name}': {e}", exc_info=True)
|
| 44 |
+
raise RuntimeError(f"Could not initialize reranker model: {e}") from e
|
| 45 |
+
|
| 46 |
+
def rerank_documents(self, query: str, documents: List[Document], top_k: int) -> List[Document]:
|
| 47 |
+
if not documents or not self.model:
|
| 48 |
+
return documents[:top_k] if documents else []
|
| 49 |
+
|
| 50 |
+
try:
|
| 51 |
+
start_time = time.time()
|
| 52 |
+
doc_pairs = [[query, doc.page_content] for doc in documents]
|
| 53 |
+
scores = self.model.predict(doc_pairs)
|
| 54 |
+
rerank_time = time.time() - start_time
|
| 55 |
+
self.logger.info(f"[RERANKER] Computed relevance scores in {rerank_time:.3f}s")
|
| 56 |
+
|
| 57 |
+
doc_score_pairs = list(zip(documents, scores))
|
| 58 |
+
doc_score_pairs.sort(key=lambda x: x[1], reverse=True)
|
| 59 |
+
|
| 60 |
+
reranked_docs = []
|
| 61 |
+
for doc, score in doc_score_pairs[:top_k]:
|
| 62 |
+
doc.metadata["reranker_score"] = float(score)
|
| 63 |
+
reranked_docs.append(doc)
|
| 64 |
+
|
| 65 |
+
return reranked_docs
|
| 66 |
+
except Exception as e:
|
| 67 |
+
self.logger.error(f"[RERANKER] Error during reranking: {e}", exc_info=True)
|
| 68 |
+
return documents[:top_k] if documents else []
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
class FAISSRetrieverWithScore(BaseRetriever):
|
| 72 |
+
vectorstore: FAISS
|
| 73 |
+
reranker: Optional[DocumentReranker] = None
|
| 74 |
+
initial_fetch_k: int = RAG_INITIAL_FETCH_K
|
| 75 |
+
final_k: int = RAG_RERANKER_K
|
| 76 |
+
|
| 77 |
+
def _get_relevant_documents(
|
| 78 |
+
self, query: str, *, run_manager: CallbackManagerForRetrieverRun
|
| 79 |
+
) -> List[Document]:
|
| 80 |
+
|
| 81 |
+
start_time = time.time()
|
| 82 |
+
num_to_fetch = self.initial_fetch_k if self.reranker else self.final_k
|
| 83 |
+
|
| 84 |
+
logger.info(f"[RETRIEVER] Fetching {num_to_fetch} docs (Rerank={self.reranker is not None})")
|
| 85 |
+
|
| 86 |
+
docs_and_scores = self.vectorstore.similarity_search_with_score(query, k=num_to_fetch)
|
| 87 |
+
|
| 88 |
+
relevant_docs = []
|
| 89 |
+
for doc, score in docs_and_scores:
|
| 90 |
+
doc.metadata["retrieval_score"] = float(score)
|
| 91 |
+
relevant_docs.append(doc)
|
| 92 |
+
|
| 93 |
+
if self.reranker and relevant_docs:
|
| 94 |
+
relevant_docs = self.reranker.rerank_documents(query, relevant_docs, top_k=self.final_k)
|
| 95 |
+
|
| 96 |
+
total_time = time.time() - start_time
|
| 97 |
+
logger.info(f"[RETRIEVER] Completed in {total_time:.3f}s. Returned {len(relevant_docs)} docs.")
|
| 98 |
+
return relevant_docs
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
class KnowledgeRAG:
|
| 102 |
+
def __init__(
|
| 103 |
+
self,
|
| 104 |
+
index_storage_dir: str,
|
| 105 |
+
embedding_model_name: str,
|
| 106 |
+
use_gpu_for_embeddings: bool,
|
| 107 |
+
chunk_size: int = RAG_CHUNK_SIZE,
|
| 108 |
+
chunk_overlap: int = RAG_CHUNK_OVERLAP,
|
| 109 |
+
reranker_model_name: Optional[str] = None,
|
| 110 |
+
enable_reranker: bool = True,
|
| 111 |
+
):
|
| 112 |
+
self.logger = logging.getLogger(__name__ + ".KnowledgeRAG")
|
| 113 |
+
self.logger.info(f"[RAG_INIT] Initializing KnowledgeRAG system")
|
| 114 |
+
|
| 115 |
+
self.index_storage_dir = index_storage_dir
|
| 116 |
+
os.makedirs(self.index_storage_dir, exist_ok=True)
|
| 117 |
+
|
| 118 |
+
self.embedding_model_name = embedding_model_name
|
| 119 |
+
self.use_gpu_for_embeddings = use_gpu_for_embeddings
|
| 120 |
+
self.chunk_size = chunk_size
|
| 121 |
+
self.chunk_overlap = chunk_overlap
|
| 122 |
+
self.reranker_model_name = reranker_model_name or RAG_RERANKER_MODEL_NAME
|
| 123 |
+
self.enable_reranker = enable_reranker
|
| 124 |
+
self.reranker = None
|
| 125 |
+
|
| 126 |
+
device = "cpu"
|
| 127 |
+
if self.use_gpu_for_embeddings:
|
| 128 |
+
if torch.cuda.is_available():
|
| 129 |
+
self.logger.info(f"[RAG_INIT] CUDA available. Requesting GPU.")
|
| 130 |
+
device = "cuda"
|
| 131 |
+
else:
|
| 132 |
+
self.logger.warning("[RAG_INIT] CUDA not available. Fallback to CPU.")
|
| 133 |
+
|
| 134 |
+
self.embeddings = HuggingFaceEmbeddings(
|
| 135 |
+
model_name=self.embedding_model_name,
|
| 136 |
+
model_kwargs={"device": device},
|
| 137 |
+
encode_kwargs={"normalize_embeddings": True}
|
| 138 |
+
)
|
| 139 |
+
|
| 140 |
+
if self.enable_reranker:
|
| 141 |
+
try:
|
| 142 |
+
self.reranker = DocumentReranker(self.reranker_model_name)
|
| 143 |
+
except Exception as e:
|
| 144 |
+
self.logger.warning(f"[RAG_INIT] Reranker Init Failed: {e}")
|
| 145 |
+
self.reranker = None
|
| 146 |
+
|
| 147 |
+
self.vector_store: Optional[FAISS] = None
|
| 148 |
+
self.retriever: Optional[FAISSRetrieverWithScore] = None
|
| 149 |
+
self.processed_source_files: List[str] = []
|
| 150 |
+
|
| 151 |
+
def _save_chunk_config(self):
|
| 152 |
+
faiss_path = os.path.join(self.index_storage_dir, RAG_FAISS_INDEX_SUBDIR_NAME)
|
| 153 |
+
config_file = os.path.join(faiss_path, "chunk_config.json")
|
| 154 |
+
with open(config_file, 'w') as f:
|
| 155 |
+
json.dump({"chunk_size": self.chunk_size, "chunk_overlap": self.chunk_overlap}, f)
|
| 156 |
+
|
| 157 |
+
def _load_chunk_config(self) -> Optional[dict]:
|
| 158 |
+
faiss_path = os.path.join(self.index_storage_dir, RAG_FAISS_INDEX_SUBDIR_NAME)
|
| 159 |
+
config_file = os.path.join(faiss_path, "chunk_config.json")
|
| 160 |
+
if os.path.exists(config_file):
|
| 161 |
+
with open(config_file, 'r') as f:
|
| 162 |
+
return json.load(f)
|
| 163 |
+
return None
|
| 164 |
+
|
| 165 |
+
def chunk_config_has_changed(self) -> bool:
|
| 166 |
+
saved = self._load_chunk_config()
|
| 167 |
+
if saved is None:
|
| 168 |
+
return False
|
| 169 |
+
changed = saved.get("chunk_size") != self.chunk_size or saved.get("chunk_overlap") != self.chunk_overlap
|
| 170 |
+
if changed:
|
| 171 |
+
self.logger.warning(
|
| 172 |
+
f"[CONFIG_CHANGE] Chunk config mismatch! "
|
| 173 |
+
f"Saved=(size={saved.get('chunk_size')}, overlap={saved.get('chunk_overlap')}) "
|
| 174 |
+
f"Current=(size={self.chunk_size}, overlap={self.chunk_overlap}). "
|
| 175 |
+
f"Index will be rebuilt."
|
| 176 |
+
)
|
| 177 |
+
return changed
|
| 178 |
+
|
| 179 |
+
def build_index_from_source_files(self, source_folder_path: str):
|
| 180 |
+
self.logger.info(f"[INDEX_BUILD] Building from: {source_folder_path}")
|
| 181 |
+
if not os.path.isdir(source_folder_path):
|
| 182 |
+
raise FileNotFoundError(f"Source folder not found: '{source_folder_path}'.")
|
| 183 |
+
|
| 184 |
+
all_docs = []
|
| 185 |
+
processed_files = []
|
| 186 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=self.chunk_size, chunk_overlap=self.chunk_overlap)
|
| 187 |
+
|
| 188 |
+
pre_chunked_path = os.path.join(self.index_storage_dir, RAG_CHUNKED_SOURCES_FILENAME)
|
| 189 |
+
if os.path.exists(pre_chunked_path):
|
| 190 |
+
try:
|
| 191 |
+
with open(pre_chunked_path, 'r', encoding='utf-8') as f:
|
| 192 |
+
chunk_data_list = json.load(f)
|
| 193 |
+
for chunk in chunk_data_list:
|
| 194 |
+
doc = Document(page_content=chunk.get("page_content", ""), metadata=chunk.get("metadata", {}))
|
| 195 |
+
all_docs.append(doc)
|
| 196 |
+
if 'source_document_name' in doc.metadata:
|
| 197 |
+
processed_files.append(doc.metadata['source_document_name'])
|
| 198 |
+
processed_files = sorted(list(set(processed_files)))
|
| 199 |
+
except Exception as e:
|
| 200 |
+
self.logger.error(f"[INDEX_BUILD] JSON load failed: {e}")
|
| 201 |
+
|
| 202 |
+
if not all_docs:
|
| 203 |
+
for filename in os.listdir(source_folder_path):
|
| 204 |
+
file_path = os.path.join(source_folder_path, filename)
|
| 205 |
+
if not os.path.isfile(file_path): continue
|
| 206 |
+
file_ext = filename.split('.')[-1].lower()
|
| 207 |
+
|
| 208 |
+
if file_ext in FAISS_RAG_SUPPORTED_EXTENSIONS:
|
| 209 |
+
# Specific handler for CSV formatting
|
| 210 |
+
if file_ext == 'csv':
|
| 211 |
+
try:
|
| 212 |
+
with open(file_path, mode='r', encoding='utf-8-sig') as f:
|
| 213 |
+
reader = csv.DictReader(f)
|
| 214 |
+
for i, row in enumerate(reader):
|
| 215 |
+
row_text = "\n".join([f"{k}: {v}" for k, v in row.items() if k and v and str(v).strip()])
|
| 216 |
+
meta = {"source_document_name": filename, "chunk_index": i, "source_type": "csv"}
|
| 217 |
+
all_docs.append(Document(page_content=row_text, metadata=meta))
|
| 218 |
+
processed_files.append(filename)
|
| 219 |
+
except Exception as e:
|
| 220 |
+
self.logger.error(f"[INDEX_BUILD] Error processing CSV {filename}: {e}")
|
| 221 |
+
else:
|
| 222 |
+
text_content = FAISS_RAG_SUPPORTED_EXTENSIONS[file_ext](file_path)
|
| 223 |
+
if text_content and text_content != "CSV_HANDLED_NATIVELY":
|
| 224 |
+
chunks = text_splitter.split_text(text_content)
|
| 225 |
+
for i, chunk_text in enumerate(chunks):
|
| 226 |
+
meta = {"source_document_name": filename, "chunk_index": i}
|
| 227 |
+
all_docs.append(Document(page_content=chunk_text, metadata=meta))
|
| 228 |
+
processed_files.append(filename)
|
| 229 |
+
|
| 230 |
+
if not all_docs:
|
| 231 |
+
raise ValueError("No documents to index.")
|
| 232 |
+
|
| 233 |
+
self.processed_source_files = processed_files
|
| 234 |
+
self.logger.info(f"[INDEX_BUILD] Creating FAISS index with {len(all_docs)} chunks.")
|
| 235 |
+
|
| 236 |
+
self.vector_store = FAISS.from_documents(all_docs, self.embeddings)
|
| 237 |
+
faiss_path = os.path.join(self.index_storage_dir, RAG_FAISS_INDEX_SUBDIR_NAME)
|
| 238 |
+
self.vector_store.save_local(faiss_path)
|
| 239 |
+
self._save_chunk_config()
|
| 240 |
+
|
| 241 |
+
self.retriever = FAISSRetrieverWithScore(
|
| 242 |
+
vectorstore=self.vector_store,
|
| 243 |
+
reranker=self.reranker,
|
| 244 |
+
initial_fetch_k=RAG_INITIAL_FETCH_K,
|
| 245 |
+
final_k=RAG_RERANKER_K
|
| 246 |
+
)
|
| 247 |
+
|
| 248 |
+
def load_index_from_disk(self):
|
| 249 |
+
faiss_path = os.path.join(self.index_storage_dir, RAG_FAISS_INDEX_SUBDIR_NAME)
|
| 250 |
+
if not os.path.exists(faiss_path):
|
| 251 |
+
raise FileNotFoundError("Index not found.")
|
| 252 |
+
|
| 253 |
+
self.vector_store = FAISS.load_local(
|
| 254 |
+
folder_path=faiss_path,
|
| 255 |
+
embeddings=self.embeddings,
|
| 256 |
+
allow_dangerous_deserialization=True
|
| 257 |
+
)
|
| 258 |
+
self.retriever = FAISSRetrieverWithScore(
|
| 259 |
+
vectorstore=self.vector_store,
|
| 260 |
+
reranker=self.reranker,
|
| 261 |
+
initial_fetch_k=RAG_INITIAL_FETCH_K,
|
| 262 |
+
final_k=RAG_RERANKER_K
|
| 263 |
+
)
|
| 264 |
+
|
| 265 |
+
meta_file = os.path.join(faiss_path, "processed_files.json")
|
| 266 |
+
if os.path.exists(meta_file):
|
| 267 |
+
with open(meta_file, 'r') as f:
|
| 268 |
+
self.processed_source_files = json.load(f)
|
| 269 |
+
else:
|
| 270 |
+
self.processed_source_files = ["Loaded from disk (unknown sources)"]
|
| 271 |
+
|
| 272 |
+
self.logger.info("[INDEX_LOAD] Success.")
|
| 273 |
+
|
| 274 |
+
def update_index_with_new_files(self, source_folder_path: str, max_files_to_process: Optional[int] = None) -> Dict[str, Any]:
|
| 275 |
+
self.logger.info(f"[INDEX_UPDATE] Checking for new files in: {source_folder_path}")
|
| 276 |
+
|
| 277 |
+
if not self.vector_store:
|
| 278 |
+
raise RuntimeError("Cannot update: no index loaded.")
|
| 279 |
+
|
| 280 |
+
processed_set = set(self.processed_source_files)
|
| 281 |
+
all_new_files = []
|
| 282 |
+
for filename in sorted(os.listdir(source_folder_path)):
|
| 283 |
+
if filename not in processed_set:
|
| 284 |
+
file_ext = filename.split('.')[-1].lower()
|
| 285 |
+
if file_ext in FAISS_RAG_SUPPORTED_EXTENSIONS:
|
| 286 |
+
all_new_files.append(filename)
|
| 287 |
+
|
| 288 |
+
if not all_new_files:
|
| 289 |
+
return {"status": "success", "message": "No new files found.", "files_added": []}
|
| 290 |
+
|
| 291 |
+
limit = max_files_to_process if max_files_to_process is not None else RAG_MAX_FILES_FOR_INCREMENTAL
|
| 292 |
+
files_to_process = all_new_files[:limit]
|
| 293 |
+
|
| 294 |
+
new_docs = []
|
| 295 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=self.chunk_size, chunk_overlap=self.chunk_overlap)
|
| 296 |
+
|
| 297 |
+
for filename in files_to_process:
|
| 298 |
+
file_path = os.path.join(source_folder_path, filename)
|
| 299 |
+
file_ext = filename.split('.')[-1].lower()
|
| 300 |
+
|
| 301 |
+
if file_ext == 'csv':
|
| 302 |
+
try:
|
| 303 |
+
with open(file_path, mode='r', encoding='utf-8-sig') as f:
|
| 304 |
+
reader = csv.DictReader(f)
|
| 305 |
+
for i, row in enumerate(reader):
|
| 306 |
+
row_text = "\n".join([f"{k}: {v}" for k, v in row.items() if k and v and str(v).strip()])
|
| 307 |
+
meta = {"source_document_name": filename, "chunk_index": i, "source_type": "csv"}
|
| 308 |
+
new_docs.append(Document(page_content=row_text, metadata=meta))
|
| 309 |
+
except Exception as e:
|
| 310 |
+
self.logger.error(f"[INDEX_UPDATE] Error processing CSV {filename}: {e}")
|
| 311 |
+
else:
|
| 312 |
+
text_content = FAISS_RAG_SUPPORTED_EXTENSIONS[file_ext](file_path)
|
| 313 |
+
if text_content and text_content != "CSV_HANDLED_NATIVELY":
|
| 314 |
+
chunks = text_splitter.split_text(text_content)
|
| 315 |
+
for i, chunk_text in enumerate(chunks):
|
| 316 |
+
meta = {"source_document_name": filename, "chunk_index": i}
|
| 317 |
+
new_docs.append(Document(page_content=chunk_text, metadata=meta))
|
| 318 |
+
|
| 319 |
+
if not new_docs:
|
| 320 |
+
return {"status": "warning", "message": "New files found but no text extracted.", "files_added": []}
|
| 321 |
+
|
| 322 |
+
self.vector_store.add_documents(new_docs)
|
| 323 |
+
|
| 324 |
+
faiss_path = os.path.join(self.index_storage_dir, RAG_FAISS_INDEX_SUBDIR_NAME)
|
| 325 |
+
self.vector_store.save_local(faiss_path)
|
| 326 |
+
|
| 327 |
+
self.processed_source_files.extend(files_to_process)
|
| 328 |
+
with open(os.path.join(faiss_path, "processed_files.json"), 'w') as f:
|
| 329 |
+
json.dump(sorted(self.processed_source_files), f)
|
| 330 |
+
|
| 331 |
+
return {
|
| 332 |
+
"status": "success",
|
| 333 |
+
"message": f"Added {len(files_to_process)} files.",
|
| 334 |
+
"files_added": files_to_process,
|
| 335 |
+
"remaining": len(all_new_files) - len(files_to_process)
|
| 336 |
+
}
|
| 337 |
+
|
| 338 |
+
def search_knowledge_base(self, query: str, top_k: Optional[int] = None) -> List[Dict[str, Any]]:
|
| 339 |
+
if not self.retriever:
|
| 340 |
+
raise RuntimeError("Retriever not initialized.")
|
| 341 |
+
|
| 342 |
+
original_k = self.retriever.final_k
|
| 343 |
+
if top_k:
|
| 344 |
+
self.retriever.final_k = top_k
|
| 345 |
+
|
| 346 |
+
try:
|
| 347 |
+
docs = self.retriever.invoke(query)
|
| 348 |
+
results = []
|
| 349 |
+
for doc in docs:
|
| 350 |
+
results.append({
|
| 351 |
+
"content": doc.page_content,
|
| 352 |
+
"metadata": doc.metadata,
|
| 353 |
+
"score": doc.metadata.get("reranker_score") or doc.metadata.get("retrieval_score")
|
| 354 |
+
})
|
| 355 |
+
return results
|
| 356 |
+
finally:
|
| 357 |
+
self.retriever.final_k = original_k
|
rag_system.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import logging
|
| 3 |
+
import shutil
|
| 4 |
+
from typing import Optional
|
| 5 |
+
|
| 6 |
+
from rag_components import KnowledgeRAG
|
| 7 |
+
from utils import download_and_unzip_gdrive_folder
|
| 8 |
+
from config import (
|
| 9 |
+
GDRIVE_SOURCES_ENABLED, GDRIVE_FOLDER_ID_OR_URL, RAG_SOURCES_DIR,
|
| 10 |
+
RAG_STORAGE_PARENT_DIR, RAG_FAISS_INDEX_SUBDIR_NAME, RAG_LOAD_INDEX_ON_STARTUP,
|
| 11 |
+
RAG_EMBEDDING_MODEL_NAME, RAG_EMBEDDING_USE_GPU,
|
| 12 |
+
RAG_CHUNK_SIZE, RAG_CHUNK_OVERLAP,
|
| 13 |
+
RAG_RERANKER_MODEL_NAME, RAG_RERANKER_ENABLED
|
| 14 |
+
)
|
| 15 |
+
|
| 16 |
+
logger = logging.getLogger(__name__)
|
| 17 |
+
|
| 18 |
+
def initialize_and_get_rag_system(force_rebuild: bool = False, source_dir_override: Optional[str] = None, storage_dir_override: Optional[str] = None) -> Optional[KnowledgeRAG]:
|
| 19 |
+
|
| 20 |
+
logger.info("[RAG_SYSTEM_INIT] Initializing...")
|
| 21 |
+
source_dir_to_use = source_dir_override if source_dir_override else RAG_SOURCES_DIR
|
| 22 |
+
storage_dir_to_use = storage_dir_override if storage_dir_override else RAG_STORAGE_PARENT_DIR
|
| 23 |
+
|
| 24 |
+
# GDrive Logic
|
| 25 |
+
if GDRIVE_SOURCES_ENABLED and not source_dir_override and GDRIVE_FOLDER_ID_OR_URL:
|
| 26 |
+
logger.info("[RAG_SYSTEM_INIT] Downloading sources from GDrive...")
|
| 27 |
+
if os.path.exists(RAG_SOURCES_DIR):
|
| 28 |
+
shutil.rmtree(RAG_SOURCES_DIR)
|
| 29 |
+
download_and_unzip_gdrive_folder(GDRIVE_FOLDER_ID_OR_URL, RAG_SOURCES_DIR)
|
| 30 |
+
|
| 31 |
+
faiss_index_path = os.path.join(storage_dir_to_use, RAG_FAISS_INDEX_SUBDIR_NAME)
|
| 32 |
+
|
| 33 |
+
if force_rebuild and os.path.exists(faiss_index_path):
|
| 34 |
+
logger.info("[RAG_SYSTEM_INIT] Force rebuild: deleting old index.")
|
| 35 |
+
shutil.rmtree(faiss_index_path)
|
| 36 |
+
|
| 37 |
+
try:
|
| 38 |
+
rag = KnowledgeRAG(
|
| 39 |
+
index_storage_dir=storage_dir_to_use,
|
| 40 |
+
embedding_model_name=RAG_EMBEDDING_MODEL_NAME,
|
| 41 |
+
use_gpu_for_embeddings=RAG_EMBEDDING_USE_GPU,
|
| 42 |
+
chunk_size=RAG_CHUNK_SIZE,
|
| 43 |
+
chunk_overlap=RAG_CHUNK_OVERLAP,
|
| 44 |
+
reranker_model_name=RAG_RERANKER_MODEL_NAME,
|
| 45 |
+
enable_reranker=RAG_RERANKER_ENABLED,
|
| 46 |
+
)
|
| 47 |
+
|
| 48 |
+
loaded = False
|
| 49 |
+
if RAG_LOAD_INDEX_ON_STARTUP and not force_rebuild:
|
| 50 |
+
if rag.chunk_config_has_changed():
|
| 51 |
+
logger.warning("[RAG_SYSTEM_INIT] Chunk config changed — forcing index rebuild.")
|
| 52 |
+
else:
|
| 53 |
+
try:
|
| 54 |
+
rag.load_index_from_disk()
|
| 55 |
+
loaded = True
|
| 56 |
+
except Exception as e:
|
| 57 |
+
logger.warning(f"[RAG_SYSTEM_INIT] Load failed ({e}). Building new.")
|
| 58 |
+
|
| 59 |
+
if not loaded:
|
| 60 |
+
if not os.path.exists(source_dir_to_use) or not os.listdir(source_dir_to_use):
|
| 61 |
+
logger.warning("[RAG_SYSTEM_INIT] No sources found. System empty.")
|
| 62 |
+
else:
|
| 63 |
+
rag.build_index_from_source_files(source_dir_to_use)
|
| 64 |
+
|
| 65 |
+
logger.info("[RAG_SYSTEM_INIT] Complete.")
|
| 66 |
+
return rag
|
| 67 |
+
|
| 68 |
+
except Exception as e:
|
| 69 |
+
logger.critical(f"[RAG_SYSTEM_INIT] FATAL: {e}", exc_info=True)
|
| 70 |
+
return None
|
requirements.txt
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Flask==3.1.2
|
| 2 |
+
Flask_Cors==5.0.0
|
| 3 |
+
gdown==5.2.1
|
| 4 |
+
langchain==1.2.10
|
| 5 |
+
langchain_community==0.4.1
|
| 6 |
+
langchain_huggingface==1.2.0
|
| 7 |
+
pandas==3.0.0
|
| 8 |
+
pypdf==6.7.0
|
| 9 |
+
python-dotenv==1.2.1
|
| 10 |
+
python_docx==1.1.2
|
| 11 |
+
sentence_transformers==3.4.0
|
| 12 |
+
torch==2.9.0
|
| 13 |
+
langchain_core
|
| 14 |
+
langchain_text_splitters
|
| 15 |
+
faiss-cpu
|
| 16 |
+
langchain-huggingface
|
sources/vehicle.csv
ADDED
|
@@ -0,0 +1,262 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Stock Number,Make,Model,Mileage,Registration,Human Description,Status,Location,Fuel,Voi Post Checked,Price
|
| 2 |
+
"428,611",Volkswagen,tiguan,26036,GK72OLN,"Grey 1.4 litre Semi Auto Hybrid Electric Volkswagen tiguan life at 26,036 miles",In Stock,Cannock,Hybrid Electric,TRUE,"19,500"
|
| 3 |
+
"424,986",Volkswagen,tiguan,66303,VE70LDJ,"Black 1.5 litre Semi Auto Petrol Volkswagen tiguan elegance at 66,303 miles",In Stock,Leeds,Petrol,TRUE,"17,700"
|
| 4 |
+
"434,913",Volkswagen,tiguan,14529,DA21BHF,"Grey 1.5 litre Semi Auto Petrol Volkswagen tiguan life at 14,529 miles",In Stock,Camberley,Petrol,TRUE,"20,200"
|
| 5 |
+
"417,721",Vauxhall,corsa,6597,LR73MYB,"Grey 1.2 litre Manual Petrol Vauxhall corsa design at 6,597 miles",In Stock,Sheffield,Petrol,TRUE,"10,500"
|
| 6 |
+
"426,301",Land Rover,discovery sport,46282,LL21DDX,"Blue 1.5 litre Automatic Hybrid Electric Land Rover discovery sport hse at 46,282 miles",In Stock,Leeds,Hybrid Electric,TRUE,"22,600"
|
| 7 |
+
"428,073",Ds,ds,42236,LB71OMC,"Black 1.2 litre Automatic Petrol Ds ds 3 crossback puretech performance line s/s eat8 at 42,236 miles",In Stock,Wimbledon,Petrol,TRUE,"11,800"
|
| 8 |
+
"406,980",Audi,a4,7434,WR73DVW,"Black 2 litre Semi Auto Diesel Audi a4 avant quattro s line black edition mhev at 7,434 miles",In Stock,Leeds,Diesel,TRUE,"31,000"
|
| 9 |
+
"434,265",Bmw,5 series,41102,MJ70PYP,"Grey 2 litre Automatic Electric Diesel Bmw 5 series m sport mhev at 41,102 miles",In Stock,Cannock,Electric Diesel,TRUE,"19,300"
|
| 10 |
+
"422,417",Mg,mg,21471,ST72MHL,"Silver 1.5 litre Manual Petrol Mg mg hs excite at 21,471 miles",In Stock,Norwich,Petrol,TRUE,"12,279"
|
| 11 |
+
"429,882",Nissan,qashqai,69582,LE72CPV,"Grey 1.5 litre Automatic Hybrid Electric Nissan qashqai e-power tekna at 69,582 miles",In Stock,Fengate,Hybrid Electric,TRUE,"17,414"
|
| 12 |
+
"422,146",Volkswagen,tiguan,58117,LM19UNY,"Black 2 litre Manual Diesel Volkswagen tiguan allspace sel at 58,117 miles",In Stock,Fengate,Diesel,TRUE,"17,759"
|
| 13 |
+
"425,521",Volkswagen,polo,32622,YH72EWR,"Blue 1 litre Semi Auto Petrol Volkswagen polo r-line at 32,622 miles",In Stock,Wimbledon,Petrol,TRUE,"16,500"
|
| 14 |
+
"434,451",Volkswagen,polo,28183,SD72HNP,"Grey 1 litre Semi Auto Petrol Volkswagen polo r-line at 28,183 miles",In Stock,Wimbledon,Petrol,TRUE,"16,700"
|
| 15 |
+
"418,382",Mini,hatch,27294,HN73XTE,"Blue/Black 1.5 litre Automatic Petrol Mini hatch cooper exclusive at 27,294 miles",In Stock,Leeds,Petrol,TRUE,"17,800"
|
| 16 |
+
"415,694",Mercedes,cla,37995,KT23HMV,"Grey 1.3 litre Automatic Hybrid Electric Mercedes cla e amg line premium plus night edition at 37,995 miles",Reserved,Cannock,Hybrid Electric,TRUE,"21,100"
|
| 17 |
+
"415,694",Mercedes,cla,37995,KT23HMV,"Grey 1.3 litre Automatic Hybrid Electric Mercedes cla e amg line premium plus night edition at 37,995 miles",Reserved,Cannock,Hybrid Electric,TRUE,"21,100"
|
| 18 |
+
"423,612",Bmw,3 series,16729,LS69JXC,"Black 2 litre Automatic Petrol Bmw 3 series m sport at 16,729 miles",In Stock,Blue bell hill sales,Petrol,TRUE,"22,000"
|
| 19 |
+
"422,684",Mercedes,cla,32207,WA72XRP,"Grey 1.3 litre Automatic Petrol Mercedes cla amg line premium plus night edition at 32,207 miles",Reserved,Sheffield,Petrol,TRUE,"23,300"
|
| 20 |
+
"414,969",Mini,hatch,10644,HN72ZZO,"Black 0 litre Automatic Electric Mini hatch cooper s level 2 at 10,644 miles",In Stock,Camberley,Electric,TRUE,"12,473"
|
| 21 |
+
"431,158",Polestar,polestar,20353,OW72LKK,"Black 0 litre Automatic Electric Polestar polestar 2 base at 20,353 miles",In Stock,Leeds,Electric,TRUE,"17,500"
|
| 22 |
+
"437,101",Toyota,chr,32559,YE72LKF,"Grey 1.8 litre Cvt Hybrid Electric Toyota chr icon at 32,559 miles",In Stock,Sheffield,Hybrid Electric,TRUE,"17,000"
|
| 23 |
+
"421,665",Audi,q2,21699,LT72KJF,"Grey 1.5 litre Semi Auto Petrol Audi q2 sport at 21,699 miles",In Stock,Camberley,Petrol,TRUE,"19,268"
|
| 24 |
+
"428,189",Toyota,corolla,27409,BK72ZZX,"White 1.8 litre Cvt Hybrid Electric Toyota corolla icon tech at 27,409 miles",In Stock,Cannock,Hybrid Electric,TRUE,"18,400"
|
| 25 |
+
"436,789",Audi,a1,39387,SA21WXX,"White 1 litre Semi Auto Petrol Audi a1 sportback sport at 39,387 miles",In Stock,Wimbledon,Petrol,TRUE,"15,200"
|
| 26 |
+
"424,743",Audi,a1,23997,YP71XRT,"Yellow 1 litre Semi Auto Petrol Audi a1 sportback technik at 23,997 miles",In Stock,Blue bell hill sales,Petrol,TRUE,"16,100"
|
| 27 |
+
"433,092",Vauxhall,corsa,6798,DN23UFU,"White 1.2 litre Automatic Petrol Vauxhall corsa gs at 6,798 miles",In Stock,Camberley,Petrol,TRUE,"13,935"
|
| 28 |
+
"424,869",Hyundai,kona,46528,CN70CGY,"White 0 litre Automatic Electric Hyundai kona premium se at 46,528 miles",In Stock,Camberley,Electric,TRUE,"11,800"
|
| 29 |
+
"417,721",Vauxhall,corsa,6597,LR73MYB,"Grey 1.2 litre Manual Petrol Vauxhall corsa design at 6,597 miles",In Stock,Sheffield,Petrol,TRUE,"10,500"
|
| 30 |
+
"417,056",Skoda,kamiq,18462,DG72LOH,"Grey 1 litre Manual Petrol Skoda kamiq se drive tsi at 18,462 miles",In Stock,Cannock,Petrol,TRUE,"12,700"
|
| 31 |
+
"435,085",Tesla,model,57721,YE71XMB,"White 0 litre Automatic Electric Tesla model 3 standard range plus at 57,721 miles",In Stock,Leeds,Electric,TRUE,"14,000"
|
| 32 |
+
"406,286",Audi,a3,34668,HJ72MKG,"Black 1.5 litre Manual Petrol Audi a3 sportback s line mhev at 34,668 miles",In Stock,Blue bell hill sales,Petrol,TRUE,"17,500"
|
| 33 |
+
"436,805",Citroen,grand,28183,LF69SKX,"White 1.2 litre Manual Petrol Citroen grand c4 spacetourer puretech flair s/s at 28,183 miles",In Stock,Fengate,Petrol,TRUE,"12,747"
|
| 34 |
+
"428,265",Hyundai,kona,18880,MK73VCJ,"Grey 0 litre Automatic Electric Hyundai kona ultimate at 18,880 miles",In Stock,Fengate,Electric,TRUE,"17,200"
|
| 35 |
+
"431,595",Peugeot,5008,42709,ST70SZX,"Black 1.2 litre Automatic Petrol Peugeot 5008 puretech s/s allure at 42,709 miles",In Stock,Fengate,Petrol,TRUE,"16,600"
|
| 36 |
+
"419,093",Mg,mg,5607,YS25TEO,"Grey 1.5 litre Automatic Petrol Mg mg hs se dct at 5,607 miles",In Stock,Cannock,Petrol,TRUE,"18,300"
|
| 37 |
+
"431,500",Mercedes,eqc,19751,WN71MWL,"Black 0 litre Automatic Electric Mercedes eqc 4matic amg line premium at 19,751 miles",In Stock,Leeds,Electric,TRUE,"25,300"
|
| 38 |
+
"437,637",Mercedes,a-class,59136,HS71AYV,"White 1.3 litre Automatic Hybrid Electric Mercedes a-class e amg line edition premium at 59,136 miles",In Stock,Camberley,Hybrid Electric,TRUE,"17,200"
|
| 39 |
+
"435,649",Mercedes,a-class,59210,BU70KMZ,"White 1.3 litre Automatic Hybrid Electric Mercedes a-class e amg line premium at 59,210 miles",In Stock,Sheffield,Hybrid Electric,TRUE,"15,500"
|
| 40 |
+
"426,466",Tesla,model,45292,MW72WLE,"Grey 0 litre Automatic Electric Tesla model 3 model 3 at 45,292 miles",In Stock,Cannock,Electric,TRUE,"16,700"
|
| 41 |
+
"437,692",Mg,mg,15493,PK24RUJ,"Grey 0 litre Automatic Electric Mg mg 4 xpower at 15,493 miles",In Stock,Sheffield,Electric,TRUE,"18,700"
|
| 42 |
+
"428,618",Hyundai,ioniq,25750,BL72ETK,"Black 0 litre Automatic Electric Hyundai ioniq 5 premium at 25,750 miles",In Stock,Leeds,Electric,TRUE,"15,900"
|
| 43 |
+
"434,887",Skoda,karoq,28146,DE22MWZ,"Grey 1.5 litre Semi Auto Petrol Skoda karoq se l tsi dsg at 28,146 miles",In Stock,Cannock,Petrol,TRUE,"19,900"
|
| 44 |
+
"432,660",Vauxhall,astra,12295,LS74ZZB,"Red 1.2 litre Automatic Petrol Vauxhall astra gs at 12,295 miles",In Stock,Enfield,Petrol,TRUE,"16,900"
|
| 45 |
+
"433,308",Peugeot,208,23400,DA22KME,"Grey 1.2 litre Automatic Petrol Peugeot 208 puretech gt s/s at 23,400 miles",In Stock,Leeds,Petrol,TRUE,"13,400"
|
| 46 |
+
"425,987",Mercedes,cla,33568,WV22OHF,"White 1.3 litre Automatic Hybrid Electric Mercedes cla e amg line premium at 33,568 miles",In Stock,Sheffield,Hybrid Electric,TRUE,"19,500"
|
| 47 |
+
"420,970",Land Rover,range rover evoque,89584,DT21HBC,"Grey 1.5 litre Automatic Hybrid Electric Land Rover range rover evoque hse at 89,584 miles",In Stock,Leeds,Hybrid Electric,TRUE,"18,500"
|
| 48 |
+
"413,067",Lexus,ux,29066,HF22YFR,"Black 2 litre Cvt Hybrid Electric Lexus ux 250h at 29,066 miles",In Stock,Camberley,Hybrid Electric,TRUE,"18,300"
|
| 49 |
+
"424,353",Lexus,ux,30213,FN71YBT,"Silver 2 litre Cvt Hybrid Electric Lexus ux 250h at 30,213 miles",In Stock,Sheffield,Hybrid Electric,TRUE,"17,900"
|
| 50 |
+
"424,141",Tesla,model,26352,BK72JMX,"Grey 0 litre Automatic Electric Tesla model y long range awd at 26,352 miles",In Stock,Enfield,Electric,TRUE,"23,600"
|
| 51 |
+
"432,935",Tesla,model,47247,LA72FYH,"Black 0 litre Automatic Electric Tesla model y long range awd at 47,247 miles",In Stock,Blue bell hill sales,Electric,TRUE,"21,800"
|
| 52 |
+
"427,271",Audi,q3,55095,BT23CFF,"Blue 1.4 litre Semi Auto Hybrid Electric Audi q3 e technik at 55,095 miles",In Stock,Blue bell hill sales,Hybrid Electric,TRUE,"19,100"
|
| 53 |
+
"429,006",Skoda,enyaq,31121,SD72OER,"Black 0 litre Automatic Electric Skoda enyaq iv 60 at 31,121 miles",In Stock,Leeds,Electric,TRUE,"16,000"
|
| 54 |
+
"427,278",Polestar,polestar,62460,OE72VML,"Silver 0 litre Automatic Electric Polestar polestar 2 base at 62,460 miles",In Stock,Camberley,Electric,TRUE,"15,200"
|
| 55 |
+
"429,046",Kia,niro,51915,ML22XAX,"Blue 0 litre Automatic Electric Kia niro 2 at 51,915 miles",In Stock,Sheffield,Electric,TRUE,"12,400"
|
| 56 |
+
"427,368",Kia,niro,71221,CV71VZX,"Black 0 litre Automatic Electric Kia niro 4 plus at 71,221 miles",In Stock,Leeds,Electric,TRUE,"12,700"
|
| 57 |
+
"430,171",Audi,q3,15596,KY73MXB,"Blue 1.4 litre Semi Auto Hybrid Electric Audi q3 e technik at 15,596 miles",In Stock,Fengate,Hybrid Electric,TRUE,"24,200"
|
| 58 |
+
"426,807",Toyota,yaris,25606,FL70HWJ,"Blue 1.5 litre Cvt Hybrid Electric Toyota yaris design fhev at 25,606 miles",In Stock,Sheffield,Hybrid Electric,TRUE,"14,300"
|
| 59 |
+
"432,503",Volkswagen,id4,26512,YC72CSZ,"Grey 0 litre Automatic Electric Volkswagen id4 life at 26,512 miles",In Stock,Leeds,Electric,TRUE,"16,100"
|
| 60 |
+
"433,391",Volkswagen,id4,37517,ND22UAZ,"Black 0 litre Automatic Electric Volkswagen id4 life at 37,517 miles",In Stock,Cannock,Electric,TRUE,"15,000"
|
| 61 |
+
"432,663",Mercedes,eqc,24994,RX72EVM,"Blue 0 litre Automatic Electric Mercedes eqc 4matic amg line at 24,994 miles",In Stock,Cannock,Electric,TRUE,"23,300"
|
| 62 |
+
"429,592",Mercedes,gle-class,58814,BC19WGD,"Silver 2 litre Automatic Diesel Mercedes gle-class 4matic amg line premium at 58,814 miles",In Stock,Wimbledon,Diesel,TRUE,"29,500"
|
| 63 |
+
"414,421",Mercedes,gle-class,60875,YG69ECR,"Blue 2 litre Automatic Diesel Mercedes gle-class 4matic amg line premium at 60,875 miles",Reserved,BBH MMC,Diesel,TRUE,"28,700"
|
| 64 |
+
"431,695",Hyundai,bayon,27177,OE71AXO,"Bronze 1 litre Manual Hybrid Electric Hyundai bayon t-gdi se connect mhev at 27,177 miles",In Stock,Sheffield,Hybrid Electric,TRUE,"11,100"
|
| 65 |
+
"431,695",Hyundai,bayon,27177,OE71AXO,"Bronze 1 litre Manual Hybrid Electric Hyundai bayon t-gdi se connect mhev at 27,177 miles",In Stock,Sheffield,Hybrid Electric,TRUE,"11,100"
|
| 66 |
+
"408,106",Bmw,1 series,39234,NL69ANX,"White 2 litre Automatic Diesel Bmw 1 series m sport at 39,234 miles",In Stock,Blue bell hill sales,Diesel,TRUE,"16,100"
|
| 67 |
+
"420,797",Bmw,ix3,15944,GF72XST,"Black 0 litre Automatic Electric Bmw ix3 sport pro at 15,944 miles",In Stock,Enfield,Electric,TRUE,"29,500"
|
| 68 |
+
"428,319",Renault,zoe,41533,MT72LHU,"White 0 litre Automatic Electric Renault zoe s edition at 41,533 miles",In Stock,Camberley,Electric,TRUE,"9,200"
|
| 69 |
+
"434,331",Nissan,leaf,36989,YP72GKC,"Grey 0 litre Automatic Electric Nissan leaf tekna at 36,989 miles",In Stock,Camberley,Electric,TRUE,"9,500"
|
| 70 |
+
"423,282",Renault,zoe,30629,MT72LHJ,"White 0 litre Automatic Electric Renault zoe s edition at 30,629 miles",In Stock,Camberley,Electric,TRUE,"9,200"
|
| 71 |
+
"417,597",Peugeot,208,25189,MK72XFC,"Grey 0 litre Automatic Electric Peugeot 208 gt premium at 25,189 miles",Reserved,On site (west malling),Electric,TRUE,"12,300"
|
| 72 |
+
"435,356",Peugeot,208,28412,CP72DEU,"Yellow 0 litre Automatic Electric Peugeot 208 gt at 28,412 miles",In Stock,On site (west malling),Electric,TRUE,"12,300"
|
| 73 |
+
"421,495",Hyundai,i10,13517,GL23RYF,"Blue 1.2 litre Automatic Petrol Hyundai i10 mpi se connect at 13,517 miles",In Stock,Enfield,Petrol,TRUE,"13,000"
|
| 74 |
+
"434,227",Peugeot,208,15677,BL23AWX,"Red 0 litre Automatic Electric Peugeot 208 allure premium plus at 15,677 miles",In Stock,Wimbledon,Electric,TRUE,"12,400"
|
| 75 |
+
"429,976",Bmw,3 series,64079,AF70HTV,"Black 2 litre Automatic Hybrid Electric Bmw 3 series m sport at 64,079 miles",In Stock,Wimbledon,Hybrid Electric,TRUE,"16,800"
|
| 76 |
+
"436,263",Volvo,xc40,31542,GF71BNE,"Black 0 litre Automatic Electric Volvo xc40 recharge plus twin awd at 31,542 miles",In Stock,On site (west malling),Electric,TRUE,"21,800"
|
| 77 |
+
"421,271",Mg,mg,68853,WU72YRD,"Black 1.5 litre Semi Auto Petrol Mg mg hs excite dct at 68,853 miles",In Stock,Fengate,Petrol,TRUE,"11,302"
|
| 78 |
+
"410,642",Mg,mg,9650,EJ73FGK,"White 1 litre Automatic Petrol Mg mg zs exclusive t-gdi at 9,650 miles",In Stock,Fengate,Petrol,TRUE,"13,890"
|
| 79 |
+
"429,671",Mg,mg,28871,YE72OOW,"Red 1.5 litre Manual Petrol Mg mg hs excite at 28,871 miles",In Stock,Leeds,Petrol,TRUE,"12,000"
|
| 80 |
+
"407,014",Hyundai,ioniq,31030,VF21WZX,"White 1.6 litre Semi Auto Hybrid Electric Hyundai ioniq premium se at 31,030 miles",In Stock,Camberley,Hybrid Electric,TRUE,"16,464"
|
| 81 |
+
"429,542",Ford,focus,27445,YM71SZL,"Blue 1 litre Manual Petrol Ford focus st-line x edition mhev at 27,445 miles",In Stock,Leeds,Petrol,TRUE,"13,600"
|
| 82 |
+
"424,525",Ford,fiesta,6728,GL72RJY,"Grey 1 litre Automatic Petrol Ford fiesta titanium x mhev at 6,728 miles",In Stock,Leeds,Petrol,TRUE,"15,000"
|
| 83 |
+
"419,966",Vauxhall,astra,14404,VK73ZTL,"Yellow/Black 1.5 litre Automatic Diesel Vauxhall astra ultimate at 14,404 miles",In Stock,Leeds,Diesel,TRUE,"18,300"
|
| 84 |
+
"436,321",Citroen,c4,7101,AV22OBT,"Blue 1.2 litre Automatic Petrol Citroen c4 puretech sense plus s/s eat8 at 7,101 miles",In Stock,Wimbledon,Petrol,TRUE,"14,000"
|
| 85 |
+
"435,895",Peugeot,5008,57228,MF22NWT,"White 1.6 litre Automatic Petrol Peugeot 5008 puretech s/s gt at 57,228 miles",In Stock,Enfield,Petrol,TRUE,"19,500"
|
| 86 |
+
"427,069",Mercedes,a-class,40877,KT68XSX,"White 2 litre Automatic Petrol Mercedes a-class amg line premium plus at 40,877 miles",Deposit Taken,Leeds,Petrol,TRUE,"17,300"
|
| 87 |
+
"419,827",Citroen,c3,22543,LB72FKJ,"Beige 1.2 litre Manual Petrol Citroen c3 puretech c-series edition s/s at 22,543 miles",In Stock,Enfield,Petrol,TRUE,"9,300"
|
| 88 |
+
"435,029",Vauxhall,corsa,17071,DL73HNT,"Black 1.2 litre Automatic Petrol Vauxhall corsa gs at 17,071 miles",In Stock,Enfield,Petrol,TRUE,"13,400"
|
| 89 |
+
"419,827",Citroen,c3,22543,LB72FKJ,"Beige 1.2 litre Manual Petrol Citroen c3 puretech c-series edition s/s at 22,543 miles",In Stock,Enfield,Petrol,TRUE,"9,300"
|
| 90 |
+
"391,462",Toyota,chr,17725,BMZ6931,"Black 1.8 litre Cvt Hybrid Electric Toyota chr icon at 17,725 miles",In Stock,Cannock,Hybrid Electric,TRUE,"17,300"
|
| 91 |
+
"420,612",Volkswagen,up,20913,YE72XSM,"Black 0 litre Cvt Electric Volkswagen up e-up at 20,913 miles",In Stock,Blue bell hill sales,Electric,TRUE,"10,000"
|
| 92 |
+
"434,213",Bmw,x2,23001,KR72RNE,"Black 2 litre Manual Diesel Bmw x2 sport at 23,001 miles",In Stock,Leeds,Diesel,TRUE,"19,100"
|
| 93 |
+
"429,954",Volkswagen,tiguan,25528,YC23KFX,"Black 1.5 litre Semi Auto Petrol Volkswagen tiguan r-line at 25,528 miles",In Stock,Wimbledon,Petrol,TRUE,"24,800"
|
| 94 |
+
"428,070",Volkswagen,id4,29917,VK72ZSJ,"Grey 0 litre Automatic Electric Volkswagen id4 life at 29,917 miles",In Stock,Leeds,Electric,TRUE,"15,200"
|
| 95 |
+
"430,367",Volkswagen,id4,17211,YF72PNV,"Black 0 litre Automatic Electric Volkswagen id4 life at 17,211 miles",In Stock,Leeds,Electric,TRUE,"17,300"
|
| 96 |
+
"433,393",Bmw,ix3,43841,AO71XPN,"Blue 0 litre Automatic Electric Bmw ix3 edition pro at 43,841 miles",In Stock,Sheffield,Electric,TRUE,"20,900"
|
| 97 |
+
"431,595",Peugeot,5008,42709,ST70SZX,"Black 1.2 litre Automatic Petrol Peugeot 5008 puretech s/s allure at 42,709 miles",In Stock,Fengate,Petrol,TRUE,"16,600"
|
| 98 |
+
"428,914",Volkswagen,tiguan,70589,DE72UHU,"White 1.5 litre Semi Auto Petrol Volkswagen tiguan allspace life at 70,589 miles",In Stock,Blue bell hill sales,Petrol,TRUE,"17,000"
|
| 99 |
+
"400,750",Bmw,3 series,73255,YG71WVL,"Grey 3 litre Automatic Electric Diesel Bmw 3 series xdrive mhev at 73,255 miles",In Stock,Cannock,Electric Diesel,TRUE,"24,200"
|
| 100 |
+
"434,830",Mazda,mx-30,7990,NG72BTF,"Grey 0 litre Automatic Electric Mazda mx-30 gt sport tech at 7,990 miles",In Stock,Enfield,Electric,TRUE,"12,500"
|
| 101 |
+
"436,889",Mercedes,a-class,18900,BX71VOF,"Black 1.3 litre Automatic Petrol Mercedes a-class amg line at 18,900 miles",In Stock,Sheffield,Petrol,TRUE,"18,500"
|
| 102 |
+
"435,098",Bmw,1 series,56931,YG20YUY,"Grey 1.5 litre Manual Petrol Bmw 1 series m sport at 56,931 miles",In Stock,Cannock,Petrol,TRUE,"14,900"
|
| 103 |
+
"425,808",Volvo,xc40,45932,YC72EOJ,"Grey 1.5 litre Automatic Hybrid Electric Volvo xc40 recharge t4 core at 45,932 miles",In Stock,Enfield,Hybrid Electric,TRUE,"18,600"
|
| 104 |
+
"429,263",Polestar,polestar,2850,OW25CEU,"Silver 0 litre Automatic Electric Polestar polestar 4 base at 2,850 miles",In Stock,Enfield,Electric,TRUE,"34,800"
|
| 105 |
+
"427,311",Volvo,xc40,75344,YY72OXG,"Blue 0 litre Automatic Electric Volvo xc40 recharge ultimate at 75,344 miles",In Stock,Wimbledon,Electric,TRUE,"18,600"
|
| 106 |
+
"436,710",Ford,ecosport,24115,FV72XLD,"Silver 1 litre Manual Petrol Ford ecosport active at 24,115 miles",In Stock,On site (west malling),Petrol,TRUE,"11,300"
|
| 107 |
+
"438,257",Toyota,rav-4,13877,YD72NVO,"Grey 2.5 litre Cvt Hybrid Electric Toyota rav-4 vvt-i icon at 13,877 miles",In Stock,On site (west malling),Hybrid Electric,TRUE,"26,100"
|
| 108 |
+
"419,858",Toyota,rav-4,38002,YD72TKC,"Grey/Black 2.5 litre Automatic Hybrid Electric Toyota rav-4 vvt-i dynamic phev at 38,002 miles",In Stock,Enfield,Hybrid Electric,TRUE,"26,500"
|
| 109 |
+
"422,517",Toyota,rav-4,15242,LD72GYJ,"Blue 2.5 litre Cvt Hybrid Electric Toyota rav-4 vvt-i excel at 15,242 miles",In Stock,Enfield,Hybrid Electric,TRUE,"29,000"
|
| 110 |
+
"432,146",Polestar,polestar,29115,OW72LSZ,"Silver 0 litre Automatic Electric Polestar polestar 2 plus at 29,115 miles",In Stock,Sheffield,Electric,TRUE,"20,200"
|
| 111 |
+
"414,972",Audi,a4,40747,BG21SDU,"Grey 2 litre Semi Auto Diesel Audi a4 avant sport edition mhev at 40,747 miles",In Stock,Norwich,Diesel,TRUE,"18,700"
|
| 112 |
+
"434,315",Nissan,ariya,43947,SH72DPK,"Green 0 litre Automatic Electric Nissan ariya evolve at 43,947 miles",In Stock,On site (west malling),Electric,TRUE,"19,800"
|
| 113 |
+
"435,253",Kia,sportage,11532,EN24HCA,"Black 1.6 litre Semi Auto Hybrid Electric Kia sportage gt-line at 11,532 miles",In Stock,On site (west malling),Hybrid Electric,TRUE,"24,600"
|
| 114 |
+
"427,475",Mg,mg,13527,FD72MVG,"Red 1.5 litre Manual Petrol Mg mg hs excite at 13,527 miles",In Stock,Norwich,Petrol,TRUE,"12,874"
|
| 115 |
+
"426,946",Nissan,leaf,5197,YS72FSG,"Grey 0 litre Automatic Electric Nissan leaf acenta at 5,197 miles",In Stock,Enfield,Electric,TRUE,"9,300"
|
| 116 |
+
"430,046",Hyundai,ioniq,6205,BK72CKC,"Blue 0 litre Automatic Electric Hyundai ioniq premium at 6,205 miles",In Stock,Enfield,Electric,TRUE,"12,700"
|
| 117 |
+
"428,541",Land Rover,range rover evoque,46584,YM21BGZ,"Black 2 litre Automatic Diesel Land Rover range rover evoque s mhev at 46,584 miles",Reserved,Cannock,Diesel,TRUE,"20,500"
|
| 118 |
+
"437,158",Jaecoo,7,3648,YH75RJV,"Black 1.5 litre Automatic Hybrid Electric Jaecoo 7 luxury at 3,648 miles",In Stock,Sheffield,Hybrid Electric,TRUE,"28,400"
|
| 119 |
+
"431,927",Hyundai,bayon,14497,AVI4307,"White 1 litre Automatic Hybrid Electric Hyundai bayon t-gdi premium dct mhev at 14,497 miles",In Stock,Enfield,Hybrid Electric,TRUE,"14,900"
|
| 120 |
+
"434,369",Volkswagen,passat,53021,RJ21XLM,"White 1.4 litre Semi Auto Hybrid Electric Volkswagen passat gte dsg at 53,021 miles",In Stock,Camberley,Hybrid Electric,TRUE,"14,553"
|
| 121 |
+
"419,439",Honda,civic,39251,KJ68HHM,"White 1 litre Manual Petrol Honda civic vtec sport line at 39,251 miles",In Stock,Leeds,Petrol,TRUE,"11,300"
|
| 122 |
+
"423,587",Honda,cr-v,15024,FG72VNE,"Grey 2 litre Cvt Hybrid Electric Honda cr-v i-mmd se at 15,024 miles",In Stock,On site (west malling),Hybrid Electric,TRUE,"21,800"
|
| 123 |
+
"417,336",Kia,ceed,16627,RF74GFY,"Red 1.5 litre Manual Petrol Kia ceed gt-line at 16,627 miles",In Stock,Wimbledon,Petrol,TRUE,"16,300"
|
| 124 |
+
"433,857",Lexus,ux,34891,CA22BXC,"Blue 0 litre Automatic Electric Lexus ux 300e at 34,891 miles",In Stock,Sheffield,Electric,TRUE,"13,400"
|
| 125 |
+
"422,334",Volvo,xc60,26980,DA72FKF,"White 2 litre Automatic Hybrid Electric Volvo xc60 recharge t6 plus awd at 26,980 miles",In Stock,Camberley,Hybrid Electric,TRUE,"30,600"
|
| 126 |
+
"424,836",Audi,q3,43347,AK70FKS,"Blue 1.5 litre Semi Auto Petrol Audi q3 s line edition 1 mhev at 43,347 miles",In Stock,Blue bell hill sales,Petrol,TRUE,"22,800"
|
| 127 |
+
"427,097",Bmw,i4,50448,HN72XKS,"Green 0 litre Automatic Electric Bmw i4 at 50,448 miles",In Stock,Leeds,Electric,TRUE,"27,500"
|
| 128 |
+
"431,391",Mg,mg,10468,WA71VPM,"Blue 1 litre Automatic Petrol Mg mg zs excite t-gdi at 10,468 miles",In Stock,Cannock,Petrol,TRUE,"12,700"
|
| 129 |
+
"430,942",Volkswagen,id4,33878,VN72DWO,"Grey 0 litre Automatic Electric Volkswagen id4 life at 33,878 miles",In Stock,Sheffield,Electric,TRUE,"15,000"
|
| 130 |
+
"433,852",Jaguar,f-pace,45232,BV72LTJ,"White 2 litre Automatic Diesel Jaguar f-pace r-dynamic black mhev at 45,232 miles",In Stock,Sheffield,Diesel,TRUE,"26,700"
|
| 131 |
+
"416,622",Volkswagen,id4,20906,VE72UJG,"Blue 0 litre Automatic Electric Volkswagen id4 life edition at 20,906 miles",In Stock,Camberley,Electric,TRUE,"16,100"
|
| 132 |
+
"415,271",Volkswagen,id4,28823,VN72ZDJ,"Grey 0 litre Automatic Electric Volkswagen id4 life at 28,823 miles",In Stock,Camberley,Electric,TRUE,"14,764"
|
| 133 |
+
"420,157",Volkswagen,id4,6890,ND74CWW,"White 0 litre Automatic Electric Volkswagen id4 match pro 4motion at 6,890 miles",In Stock,Camberley,Electric,TRUE,"25,700"
|
| 134 |
+
"424,814",Kia,niro,35474,DP73KZR,"Blue 0 litre Automatic Electric Kia niro 2 at 35,474 miles",In Stock,Enfield,Electric,TRUE,"16,500"
|
| 135 |
+
"420,304",Kia,niro,27186,NJ72LNO,"Red 0 litre Automatic Electric Kia niro 2 at 27,186 miles",In Stock,Enfield,Electric,TRUE,"15,000"
|
| 136 |
+
"421,950",Kia,ev6,48027,MJ71ZWE,"Black 0 litre Automatic Electric Kia ev6 gt-line at 48,027 miles",In Stock,Enfield,Electric,TRUE,"17,800"
|
| 137 |
+
"418,761",Volkswagen,golf,49795,WM21FNV,"White 1.4 litre Semi Auto Hybrid Electric Volkswagen golf gte dsg at 49,795 miles",In Stock,Wimbledon,Hybrid Electric,TRUE,"16,100"
|
| 138 |
+
"429,561",Audi,a5,79959,WV72ULJ,"Black 2 litre Semi Auto Diesel Audi a5 sportback s line mhev at 79,959 miles",In Stock,Leeds,Diesel,TRUE,"19,500"
|
| 139 |
+
"424,056",Mazda,cx-5,35286,PO72NBZ,"White 2 litre Manual Petrol Mazda cx-5 sport edition at 35,286 miles",In Stock,On site (west malling),Petrol,TRUE,"17,200"
|
| 140 |
+
"430,693",Hyundai,tucson,38060,OE72FBO,"Grey 1.6 litre Semi Auto Hybrid Electric Hyundai tucson t-gdi se connect dct mhev at 38,060 miles",In Stock,Cannock,Hybrid Electric,TRUE,"17,700"
|
| 141 |
+
"435,584",Volvo,xc40,51698,YY71KYH,"Silver 0 litre Automatic Electric Volvo xc40 recharge pro twin awd at 51,698 miles",In Stock,Cannock,Electric,TRUE,"19,900"
|
| 142 |
+
"435,584",Volvo,xc40,51698,YY71KYH,"Silver 0 litre Automatic Electric Volvo xc40 recharge pro twin awd at 51,698 miles",In Stock,Cannock,Electric,TRUE,"19,900"
|
| 143 |
+
"426,776",Mercedes,eqc,40305,WJ71VXF,"Grey 0 litre Automatic Electric Mercedes eqc 4matic amg line premium plus at 40,305 miles",In Stock,Blue bell hill sales,Electric,TRUE,"24,600"
|
| 144 |
+
"435,577",Volvo,xc40,54691,YM71HCE,"Black 1.5 litre Automatic Hybrid Electric Volvo xc40 recharge t5 r-design pro at 54,691 miles",In Stock,Cannock,Hybrid Electric,TRUE,"20,600"
|
| 145 |
+
"425,975",Polestar,polestar,50455,OW23KCX,"Silver 0 litre Automatic Electric Polestar polestar 2 base at 50,455 miles",In Stock,Cannock,Electric,TRUE,"16,300"
|
| 146 |
+
"435,303",Mazda,cx-5,43693,SY72WTO,"Blue 2 litre Manual Petrol Mazda cx-5 sport edition at 43,693 miles",In Stock,Leeds,Petrol,TRUE,"17,000"
|
| 147 |
+
"437,137",Peugeot,3008,59000,AP72OFA,"Grey 1.6 litre Automatic Hybrid Electric Peugeot 3008 s/s allure premium plus at 59,000 miles",In Stock,Cannock,Hybrid Electric,TRUE,"13,800"
|
| 148 |
+
"410,052",Polestar,polestar,40706,OU71YMV,"Blue 0 litre Automatic Electric Polestar polestar 2 base at 40,706 miles",In Stock,Blue bell hill sales,Electric,TRUE,"16,700"
|
| 149 |
+
"430,454",Volkswagen,tiguan,31183,VN72OEA,"Black 1.5 litre Manual Petrol Volkswagen tiguan allspace life at 31,183 miles",In Stock,Camberley,Petrol,TRUE,"19,801"
|
| 150 |
+
"430,456",Volkswagen,tiguan,27693,DV72ZNU,"Red 1.5 litre Semi Auto Petrol Volkswagen tiguan allspace life at 27,693 miles",In Stock,Camberley,Petrol,TRUE,"20,500"
|
| 151 |
+
"428,914",Volkswagen,tiguan,70589,DE72UHU,"White 1.5 litre Semi Auto Petrol Volkswagen tiguan allspace life at 70,589 miles",In Stock,Blue bell hill sales,Petrol,TRUE,"17,000"
|
| 152 |
+
"433,789",Seat,tarraco,51271,SO70AEW,"White 1.5 litre Manual Petrol Seat tarraco tsi evo xcellence at 51,271 miles",In Stock,Wimbledon,Petrol,TRUE,"16,300"
|
| 153 |
+
"424,032",Peugeot,208,48525,WU20RFZ,"White 0 litre Automatic Electric Peugeot 208 allure at 48,525 miles",In Stock,Cannock,Electric,TRUE,"9,300"
|
| 154 |
+
"435,824",Volkswagen,t-roc,64732,FE21XLK,"Grey 1.5 litre Semi Auto Petrol Volkswagen t-roc r-line at 64,732 miles",In Stock,Blue bell hill sales,Petrol,TRUE,"16,800"
|
| 155 |
+
"423,996",Volkswagen,id4,25376,VN72PHV,"Grey 0 litre Automatic Electric Volkswagen id4 life at 25,376 miles",In Stock,Sheffield,Electric,TRUE,"14,800"
|
| 156 |
+
"423,996",Volkswagen,id4,25376,VN72PHV,"Grey 0 litre Automatic Electric Volkswagen id4 life at 25,376 miles",In Stock,Sheffield,Electric,TRUE,"14,800"
|
| 157 |
+
"405,627",Polestar,polestar,46107,OV23LZN,"Grey 0 litre Automatic Electric Polestar polestar 2 base at 46,107 miles",In Stock,Camberley,Electric,TRUE,"16,200"
|
| 158 |
+
"430,352",Hyundai,kona,67143,CN70CHZ,"White 0 litre Automatic Electric Hyundai kona premium se at 67,143 miles",In Stock,On site (west malling),Electric,TRUE,"10,500"
|
| 159 |
+
"422,524",Tesla,model,32048,LA72ENK,"White 0 litre Automatic Electric Tesla model y long range awd at 32,048 miles",In Stock,Blue bell hill sales,Electric,TRUE,"22,300"
|
| 160 |
+
"429,225",Volkswagen,golf,26720,MM73NWJ,"Grey 1.5 litre Manual Petrol Volkswagen golf r-line at 26,720 miles",In Stock,Blue bell hill sales,Petrol,TRUE,"19,500"
|
| 161 |
+
"436,585",Toyota,corolla,53906,FY71YTE,"Grey 1.8 litre Cvt Hybrid Electric Toyota corolla design at 53,906 miles",In Stock,Cannock,Hybrid Electric,TRUE,"17,800"
|
| 162 |
+
"425,060",Kia,ceed,7785,MH73ATK,"Grey 1.5 litre Manual Petrol Kia ceed 2 isg at 7,785 miles",In Stock,On site (west malling),Petrol,TRUE,"15,500"
|
| 163 |
+
"434,391",Ford,ka+,66501,AO69TXE,"Silver 1.2 litre Manual Petrol Ford ka+ active at 66,501 miles",Reserved,Norwich,Petrol,TRUE,"6,747"
|
| 164 |
+
"432,976",Vauxhall,crossland,46755,HJ18GMY,"Black 1.2 litre Manual Petrol Vauxhall crossland x se ecotec s/s at 46,755 miles",In Stock,Norwich,Petrol,TRUE,"7,000"
|
| 165 |
+
"431,320",Renault,zoe,69801,YC21BWT,"Black 0 litre Automatic Electric Renault zoe gt line at 69,801 miles",In Stock,Leeds,Electric,TRUE,"7,100"
|
| 166 |
+
"431,320",Renault,zoe,69801,YC21BWT,"Black 0 litre Automatic Electric Renault zoe gt line at 69,801 miles",In Stock,Leeds,Electric,TRUE,"7,100"
|
| 167 |
+
"436,491",Ford,ecosport,71498,YV19DYM,"Blue 1 litre Manual Petrol Ford ecosport zetec at 71,498 miles",In Stock,Norwich,Petrol,TRUE,"7,109"
|
| 168 |
+
"431,975",Hyundai,kona,65160,DS71HWT,"Grey 0 litre Automatic Electric Hyundai kona premium at 65,160 miles",In Stock,Wimbledon,Electric,TRUE,"11,500"
|
| 169 |
+
"416,558",Mercedes,c-class,39484,YB18UEL,"Black 2.1 litre Automatic Diesel Mercedes c-class 4matic amg line premium plus at 39,484 miles",In Stock,Sheffield,Diesel,TRUE,"17,100"
|
| 170 |
+
"430,352",Hyundai,kona,67143,CN70CHZ,"White 0 litre Automatic Electric Hyundai kona premium se at 67,143 miles",In Stock,On site (west malling),Electric,TRUE,"10,500"
|
| 171 |
+
"427,035",Hyundai,kona,49543,CN70CGO,"White 0 litre Automatic Electric Hyundai kona premium se at 49,543 miles",In Stock,Enfield,Electric,TRUE,"11,700"
|
| 172 |
+
"431,314",Nissan,qashqai,30426,YB71FJP,"Blue 1.3 litre Cvt Hybrid Electric Nissan qashqai dig-t tekna dct mhev at 30,426 miles",In Stock,Leeds,Hybrid Electric,TRUE,"17,100"
|
| 173 |
+
"429,722",Nissan,qashqai,46244,DL73ZRX,"Grey 1.3 litre Cvt Hybrid Electric Nissan qashqai dig-t n-connecta dct mhev at 46,244 miles",In Stock,Cannock,Hybrid Electric,TRUE,"15,600"
|
| 174 |
+
"417,290",Volvo,xc40,22430,DF21GUE,"Silver 2 litre Automatic Hybrid Electric Volvo xc40 b4 r-design pro mhev at 22,430 miles",In Stock,Norwich,Hybrid Electric,TRUE,"23,778"
|
| 175 |
+
"414,558",Volkswagen,t-roc,35579,PL23JTO,"Blue 1 litre Manual Petrol Volkswagen t-roc life at 35,579 miles",In Stock,Norwich,Petrol,TRUE,"15,200"
|
| 176 |
+
"430,354",Bmw,x3,59821,FL23DLX,"Grey 2 litre Automatic Hybrid Electric Bmw x3 m sport at 59,821 miles",In Stock,Blue bell hill sales,Hybrid Electric,TRUE,"29,500"
|
| 177 |
+
"421,976",Tesla,model,14776,LB72SXC,"White 0 litre Automatic Electric Tesla model 3 model 3 at 14,776 miles",In Stock,Blue bell hill sales,Electric,TRUE,"18,300"
|
| 178 |
+
"420,814",Tesla,model,31833,GF71WGU,"White 0 litre Automatic Electric Tesla model 3 standard range plus at 31,833 miles",In Stock,Blue bell hill sales,Electric,TRUE,"15,300"
|
| 179 |
+
"432,303",Vauxhall,mokka,31466,LY72DWU,"Grey 1.2 litre Automatic Petrol Vauxhall mokka gs line at 31,466 miles",In Stock,Leeds,Petrol,TRUE,"14,075"
|
| 180 |
+
"436,326",Vauxhall,mokka,14462,LF72KJH,"Black 1.2 litre Automatic Petrol Vauxhall mokka ultimate at 14,462 miles",In Stock,Leeds,Petrol,TRUE,"15,700"
|
| 181 |
+
"435,853",Bmw,1 series,37601,YK68ZWR,"Black 2 litre Automatic Petrol Bmw 1 series m sport shadow edition at 37,601 miles",In Stock,Norwich,Petrol,TRUE,"15,627"
|
| 182 |
+
"434,842",Seat,ateca,56834,YR20TCY,"Brown 1.6 litre Semi Auto Diesel Seat ateca tdi xcellence lux dsg at 56,834 miles",In Stock,Sheffield,Diesel,TRUE,"14,200"
|
| 183 |
+
"428,141",Toyota,aygo,38467,WH73HFK,"Grey 1 litre Automatic Petrol Toyota aygo x vvt-i undercover at 38,467 miles",Reserved,Enfield,Petrol,TRUE,"13,900"
|
| 184 |
+
"437,563",Bmw,ix3,31678,NU72KWM,"Grey 0 litre Automatic Electric Bmw ix3 sport at 31,678 miles",In Stock,Fengate,Electric,TRUE,"23,800"
|
| 185 |
+
"424,440",Ford,puma,42007,DS72UGR,"White 1 litre Manual Petrol Ford puma st-line vignale mhev at 42,007 miles",In Stock,Enfield,Petrol,TRUE,"13,800"
|
| 186 |
+
"430,625",Audi,q3,19464,SH22FYP,"White 1.5 litre Semi Auto Petrol Audi q3 technik mhev at 19,464 miles",In Stock,Sheffield,Petrol,TRUE,"21,200"
|
| 187 |
+
"434,260",Peugeot,3008,31050,VE71MYK,"Blue 1.2 litre Automatic Petrol Peugeot 3008 puretech s/s allure premium at 31,050 miles",In Stock,Sheffield,Petrol,TRUE,"15,000"
|
| 188 |
+
"415,768",Mg,mg,21332,VK73HPJ,"Red 0 litre Automatic Electric Mg mg 4 se at 21,332 miles",In Stock,Wimbledon,Electric,TRUE,"12,300"
|
| 189 |
+
"435,311",Nissan,juke,18034,KY72CMV,"Silver 1.6 litre Cvt Hybrid Electric Nissan juke n-connecta at 18,034 miles",In Stock,Camberley,Hybrid Electric,TRUE,"14,800"
|
| 190 |
+
"437,522",Mercedes,cla,54927,RO20WUX,"White 1.3 litre Semi Auto Petrol Mercedes cla amg line at 54,927 miles",In Stock,Sheffield,Petrol,TRUE,"17,200"
|
| 191 |
+
"431,798",Tesla,model,51704,BD72WXG,"Grey 0 litre Automatic Electric Tesla model y long range awd at 51,704 miles",In Stock,Leeds,Electric,TRUE,"21,300"
|
| 192 |
+
"433,916",Mg,mg,54038,FP72VPW,"Blue 1.5 litre Manual Petrol Mg mg zs excite vti-tech at 54,038 miles",In Stock,Sheffield,Petrol,TRUE,"9,900"
|
| 193 |
+
"428,998",Ford,ecosport,17541,YS70FRX,"Silver 1 litre Manual Petrol Ford ecosport st-line at 17,541 miles",In Stock,Sheffield,Petrol,TRUE,"11,100"
|
| 194 |
+
"425,060",Kia,ceed,7785,MH73ATK,"Grey 1.5 litre Manual Petrol Kia ceed 2 isg at 7,785 miles",In Stock,On site (west malling),Petrol,TRUE,"15,500"
|
| 195 |
+
"433,633",Kia,sportage,39085,FL70KFZ,"Red 1.6 litre Manual Petrol Kia sportage 2 isg at 39,085 miles",In Stock,Camberley,Petrol,TRUE,"14,008"
|
| 196 |
+
"427,577",Skoda,octavia,33416,BL71RYB,"White 1.4 litre Semi Auto Hybrid Electric Skoda octavia se l tsi dsg at 33,416 miles",In Stock,On site (west malling),Hybrid Electric,TRUE,"17,100"
|
| 197 |
+
"427,577",Skoda,octavia,33416,BL71RYB,"White 1.4 litre Semi Auto Hybrid Electric Skoda octavia se l tsi dsg at 33,416 miles",In Stock,On site (west malling),Hybrid Electric,TRUE,"17,100"
|
| 198 |
+
"431,312",Toyota,corolla,50533,YP70TVM,"Black 1.8 litre Cvt Hybrid Electric Toyota corolla design at 50,533 miles",In Stock,On site (west malling),Hybrid Electric,TRUE,"15,400"
|
| 199 |
+
"421,793",Tesla,model,36421,LA72EPZ,"White 0 litre Automatic Electric Tesla model 3 model 3 at 36,421 miles",In Stock,Camberley,Electric,TRUE,"17,100"
|
| 200 |
+
"436,373",Citroen,c5,42255,MM69AYO,"Red 1.6 litre Automatic Petrol Citroen c5 aircross puretech flair plus s/s eat8 at 42,255 miles",In Stock,On site (west malling),Petrol,TRUE,"13,000"
|
| 201 |
+
"423,720",Vauxhall,corsa,21653,VE20PXC,"Red 1.2 litre Manual Petrol Vauxhall corsa se at 21,653 miles",In Stock,On site (west malling),Petrol,TRUE,"9,300"
|
| 202 |
+
"423,686",Hyundai,ioniq,59659,FL71YSY,"Grey 0 litre Automatic Electric Hyundai ioniq 5 ultimate at 59,659 miles",In Stock,Sheffield,Electric,TRUE,"15,600"
|
| 203 |
+
"435,301",Hyundai,bayon,54746,WM72LJC,"Turquoise 1 litre Automatic Hybrid Electric Hyundai bayon t-gdi premium dct mhev at 54,746 miles",In Stock,Camberley,Hybrid Electric,TRUE,"12,044"
|
| 204 |
+
"435,301",Hyundai,bayon,54746,WM72LJC,"Turquoise 1 litre Automatic Hybrid Electric Hyundai bayon t-gdi premium dct mhev at 54,746 miles",In Stock,Camberley,Hybrid Electric,TRUE,"12,044"
|
| 205 |
+
"436,373",Citroen,c5,42255,MM69AYO,"Red 1.6 litre Automatic Petrol Citroen c5 aircross puretech flair plus s/s eat8 at 42,255 miles",In Stock,On site (west malling),Petrol,TRUE,"13,000"
|
| 206 |
+
"437,184",Mazda,cx-30,59945,RO71OSB,"Red 2 litre Manual Petrol Mazda cx-30 gt sport mhev at 59,945 miles",In Stock,Camberley,Petrol,TRUE,"12,800"
|
| 207 |
+
"431,914",Mazda,mx-30,11459,AY72KNB,"Grey 0 litre Automatic Electric Mazda mx-30 prime-line at 11,459 miles",In Stock,Fengate,Electric,TRUE,"10,149"
|
| 208 |
+
"415,919",Volkswagen,golf,12913,GF72OPT,"Grey 1.4 litre Semi Auto Hybrid Electric Volkswagen golf style at 12,913 miles",In Stock,Enfield,Hybrid Electric,TRUE,"18,100"
|
| 209 |
+
"428,067",Nissan,leaf,40409,LS71XGA,"Grey 0 litre Automatic Electric Nissan leaf n-connecta at 40,409 miles",In Stock,Leeds,Electric,TRUE,"8,300"
|
| 210 |
+
"426,352",Nissan,leaf,22799,LN20TXJ,"Black 0 litre Automatic Electric Nissan leaf tekna at 22,799 miles",In Stock,Leeds,Electric,TRUE,"8,800"
|
| 211 |
+
"431,874",Omoda,5,5054,DA25YKR,"Red 0 litre Automatic Electric Omoda 5 noble at 5,054 miles",In Stock,Leeds,Electric,TRUE,"21,500"
|
| 212 |
+
"434,150",Volkswagen,golf,31706,MW72BFP,"Grey 1.5 litre Manual Petrol Volkswagen golf style at 31,706 miles",In Stock,Leeds,Petrol,TRUE,"17,500"
|
| 213 |
+
"434,422",Volkswagen,golf,10773,LM70YBG,"Black 1.5 litre Semi Auto Hybrid Electric Volkswagen golf life etsi dsg at 10,773 miles",In Stock,Leeds,Hybrid Electric,TRUE,"17,300"
|
| 214 |
+
"435,747",Audi,q2,36636,BK22ADO,"Grey 1 litre Manual Petrol Audi q2 sport at 36,636 miles",In Stock,Wimbledon,Petrol,TRUE,"16,200"
|
| 215 |
+
"437,763",Fiat,500e,27173,ML22OKP,"Blue 0 litre Automatic Electric Fiat 500e passion at 27,173 miles",In Stock,Camberley,Electric,TRUE,"10,700"
|
| 216 |
+
"438,887",Suzuki,sx4,39525,PY72JFJ,"Blue 1.4 litre Manual Hybrid Electric Suzuki sx4 s-cross motion boosterjet mhev at 39,525 miles",In Stock,Enfield,Hybrid Electric,TRUE,"12,100"
|
| 217 |
+
"430,437",Suzuki,sx4,20095,SH72KWC,"Blue 1.4 litre Manual Hybrid Electric Suzuki sx4 s-cross motion boosterjet mhev at 20,095 miles",In Stock,Enfield,Hybrid Electric,TRUE,"13,100"
|
| 218 |
+
"431,601",Suzuki,swace,14565,SC72HXO,"White 1.8 litre Cvt Hybrid Electric Suzuki swace sz-t at 14,565 miles",In Stock,Cannock,Hybrid Electric,TRUE,"16,775"
|
| 219 |
+
"428,934",Suzuki,swace,18503,WK72HKF,"Silver 1.8 litre Cvt Hybrid Electric Suzuki swace sz5 at 18,503 miles",In Stock,Leeds,Hybrid Electric,TRUE,"17,000"
|
| 220 |
+
"403,095",Audi,q4,513,FL24XZM,Blue 0 litre Automatic Electric Audi q4 black edition at 513 miles,In Stock,Blue bell hill sales,Electric,TRUE,"30,800"
|
| 221 |
+
"434,672",Suzuki,swace,19991,DV72RYA,"Blue 1.8 litre Cvt Hybrid Electric Suzuki swace sz5 at 19,991 miles",In Stock,Norwich,Hybrid Electric,TRUE,"17,544"
|
| 222 |
+
"428,928",Volkswagen,t-roc,34184,GV72KYJ,"White 1.5 litre Semi Auto Petrol Volkswagen t-roc life at 34,184 miles",In Stock,Blue bell hill sales,Petrol,TRUE,"17,800"
|
| 223 |
+
"430,248",Audi,q3,52628,MJ21XDW,"Blue 1.5 litre Manual Petrol Audi q3 sportback s line at 52,628 miles",In Stock,Camberley,Petrol,TRUE,"19,500"
|
| 224 |
+
"436,344",Audi,q3,3232,YO21FDV,"Black 2 litre Manual Diesel Audi q3 s line black edition at 3,232 miles",In Stock,Camberley,Diesel,TRUE,"25,931"
|
| 225 |
+
"431,431",Audi,q3,21464,RE72MZV,"Black 1.5 litre Semi Auto Petrol Audi q3 sportback sport mhev at 21,464 miles",In Stock,Camberley,Petrol,TRUE,"25,700"
|
| 226 |
+
"427,958",Volkswagen,t-roc,10358,RA73MGE,"Grey 1.5 litre Semi Auto Petrol Volkswagen t-roc r-line at 10,358 miles",In Stock,Sheffield,Petrol,TRUE,"24,100"
|
| 227 |
+
"429,165",Jaguar,f-pace,59504,OW72TZS,"White 2 litre Automatic Hybrid Electric Jaguar f-pace r-dynamic black at 59,504 miles",In Stock,Camberley,Hybrid Electric,TRUE,"28,500"
|
| 228 |
+
"434,269",Jaguar,f-pace,53220,FX22TPF,"Black 2 litre Automatic Diesel Jaguar f-pace r-dynamic s mhev at 53,220 miles",In Stock,Fengate,Diesel,TRUE,"23,984"
|
| 229 |
+
"430,137",Land Rover,discovery sport,68288,VE22NNG,"Silver 1.5 litre Automatic Hybrid Electric Land Rover discovery sport hse phev at 68,288 miles",In Stock,Blue bell hill sales,Hybrid Electric,TRUE,"21,500"
|
| 230 |
+
"421,747",Audi,q3,15943,PA21UTC,"Black 1.5 litre Semi Auto Petrol Audi q3 s line mhev at 15,943 miles",In Stock,Enfield,Petrol,TRUE,"23,200"
|
| 231 |
+
"423,064",Audi,q3,22135,LV71TUO,"Grey 1.5 litre Semi Auto Petrol Audi q3 s line mhev at 22,135 miles",In Stock,Sheffield,Petrol,TRUE,"23,300"
|
| 232 |
+
"433,494",Audi,q3,18516,LL22TMU,"Grey 1.5 litre Semi Auto Petrol Audi q3 s line mhev at 18,516 miles",In Stock,Blue bell hill sales,Petrol,TRUE,"25,300"
|
| 233 |
+
"438,289",Nissan,qashqai,51288,VO69NWJ,"Silver 1.5 litre Manual Diesel Nissan qashqai dci acenta premium at 51,288 miles",In Stock,Cannock,Diesel,TRUE,"10,000"
|
| 234 |
+
"435,570",Vauxhall,crossland,27526,SA70MKX,"Black 1.2 litre Automatic Petrol Vauxhall crossland x elite nav at 27,526 miles",In Stock,Fengate,Petrol,TRUE,"10,969"
|
| 235 |
+
"422,404",Vauxhall,crossland,35723,AX70DNV,"Grey 1.2 litre Automatic Petrol Vauxhall crossland elite nav at 35,723 miles",Reserved,Norwich,Petrol,TRUE,"10,998"
|
| 236 |
+
"436,022",Citroen,c3,27576,WR21BXX,"Grey 1.2 litre Automatic Petrol Citroen c3 puretech shine plus s/s eat6 at 27,576 miles",In Stock,Fengate,Petrol,TRUE,"11,025"
|
| 237 |
+
"428,611",Volkswagen,tiguan,26036,GK72OLN,"Grey 1.4 litre Semi Auto Hybrid Electric Volkswagen tiguan life at 26,036 miles",In Stock,Cannock,Hybrid Electric,TRUE,"19,500"
|
| 238 |
+
"435,135",Volkswagen,tiguan,23491,VE72OWA,"White 1.5 litre Semi Auto Petrol Volkswagen tiguan r-line at 23,491 miles",Reserved,Cannock,Petrol,TRUE,"24,900"
|
| 239 |
+
"434,491",Ford,kuga,69411,BD68VXV,"White 1.5 litre Manual Petrol Ford kuga st-line at 69,411 miles",In Stock,Fengate,Petrol,TRUE,"9,500"
|
| 240 |
+
"432,976",Vauxhall,crossland,46755,HJ18GMY,"Black 1.2 litre Manual Petrol Vauxhall crossland x se ecotec s/s at 46,755 miles",In Stock,Norwich,Petrol,TRUE,"7,000"
|
| 241 |
+
"436,491",Ford,ecosport,71498,YV19DYM,"Blue 1 litre Manual Petrol Ford ecosport zetec at 71,498 miles",In Stock,Norwich,Petrol,TRUE,"7,109"
|
| 242 |
+
"434,646",Citroen,c5,55142,NL72UHZ,"White 1.6 litre Automatic Petrol Citroen c5 x puretech shine plus s/s eat8 at 55,142 miles",In Stock,Blue bell hill sales,Petrol,TRUE,"13,200"
|
| 243 |
+
"434,138",Kia,rio,13618,NG71BRV,"Silver 1.2 litre Manual Petrol Kia rio 1 at 13,618 miles",In Stock,Wimbledon,Petrol,TRUE,"11,100"
|
| 244 |
+
"435,658",Kia,rio,24383,FD71TEU,"White 1.2 litre Manual Petrol Kia rio 1 at 24,383 miles",In Stock,On site (west malling),Petrol,TRUE,"10,500"
|
| 245 |
+
"435,719",Kia,rio,31115,VA21WGZ,"Grey 1 litre Semi Auto Petrol Kia rio 2 at 31,115 miles",In Stock,Cannock,Petrol,TRUE,"11,900"
|
| 246 |
+
"431,166",Volkswagen,id3,14603,MW72URX,"White 0 litre Automatic Electric Volkswagen id3 life at 14,603 miles",In Stock,Leeds,Electric,TRUE,"13,400"
|
| 247 |
+
"432,165",Seat,ibiza,13365,YG73CLW,"Black 1 litre Semi Auto Petrol Seat ibiza tsi xcellence dsg at 13,365 miles",In Stock,Enfield,Petrol,TRUE,"16,300"
|
| 248 |
+
"437,354",Volkswagen,golf,25363,CA22MVP,"Black 2 litre Semi Auto Petrol Volkswagen golf r at 25,363 miles",In Stock,Camberley,Petrol,TRUE,"30,700"
|
| 249 |
+
"433,268",Volkswagen,golf,31145,WO73VOA,"White 2 litre Semi Auto Petrol Volkswagen golf r at 31,145 miles",In Stock,Camberley,Petrol,TRUE,"29,200"
|
| 250 |
+
"433,268",Volkswagen,golf,31145,WO73VOA,"White 2 litre Semi Auto Petrol Volkswagen golf r at 31,145 miles",In Stock,Camberley,Petrol,TRUE,"29,200"
|
| 251 |
+
"402,004",Volkswagen,golf,17910,YD73OCR,"White 2 litre Semi Auto Petrol Volkswagen golf r at 17,910 miles",In Stock,Wimbledon,Petrol,TRUE,"29,700"
|
| 252 |
+
"422,216",Volkswagen,golf,18499,AK23BBV,"White 2 litre Semi Auto Petrol Volkswagen golf r at 18,499 miles",In Stock,Cannock,Petrol,TRUE,"31,200"
|
| 253 |
+
"432,499",Volkswagen,golf,23142,RA23YPT,"White 2 litre Semi Auto Petrol Volkswagen golf r at 23,142 miles",In Stock,Wimbledon,Petrol,TRUE,"31,300"
|
| 254 |
+
"422,482",Volkswagen,golf,11005,VK73ZYY,"White 2 litre Semi Auto Petrol Volkswagen golf r at 11,005 miles",In Stock,Camberley,Petrol,TRUE,"31,900"
|
| 255 |
+
"432,169",Volkswagen,golf,19712,VE73TFN,"Black 2 litre Semi Auto Petrol Volkswagen golf r at 19,712 miles",In Stock,Cannock,Petrol,TRUE,"32,000"
|
| 256 |
+
"437,672",Lexus,ux,17341,KM72UYA,"Black 0 litre Automatic Electric Lexus ux 300e at 17,341 miles",In Stock,Enfield,Electric,TRUE,"15,500"
|
| 257 |
+
"429,017",Toyota,chr,39944,BG19VEO,"White 1.8 litre Cvt Hybrid Electric Toyota chr excel at 39,944 miles",In Stock,Leeds,Hybrid Electric,TRUE,"15,800"
|
| 258 |
+
"436,955",Mercedes,cla,40484,WM67EKP,"Black 1.6 litre Semi Auto Petrol Mercedes cla amg line at 40,484 miles",In Stock,Norwich,Petrol,TRUE,"15,221"
|
| 259 |
+
"432,747",Ford,fiesta,21173,SY70UNW,"Blue 1 litre Manual Petrol Ford fiesta active x edition at 21,173 miles",In Stock,Cannock,Petrol,TRUE,"11,900"
|
| 260 |
+
"437,188",Bmw,2 series,25811,HK22UGG,"Black 2 litre Automatic Petrol Bmw 2 series m sport gran coupe at 25,811 miles",In Stock,Blue bell hill sales,Petrol,TRUE,"21,500"
|
| 261 |
+
"438,021",Mercedes,e-class,30300,WH19XTG,"Grey 2 litre Automatic Diesel Mercedes e-class amg line at 30,300 miles",Reserved,Norwich,Diesel,TRUE,"19,700"
|
| 262 |
+
"434,431",Hyundai,i10,28878,GY72NTM,"Black 1.2 litre Automatic Petrol Hyundai i10 mpi premium at 28,878 miles",In Stock,On site (west malling),Petrol,TRUE,"12,600"
|
templates/chat-bot.html
ADDED
|
@@ -0,0 +1,480 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8">
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>RAG Admin Console</title>
|
| 7 |
+
<link href="https://fonts.googleapis.com/css2?family=DM+Mono:wght@400;500&family=Sora:wght@300;400;500;600&display=swap" rel="stylesheet">
|
| 8 |
+
<style>
|
| 9 |
+
:root {
|
| 10 |
+
--bg: #0b0d11;
|
| 11 |
+
--surface: #12151c;
|
| 12 |
+
--border: rgba(255,255,255,0.07);
|
| 13 |
+
--border-hi: rgba(99,210,255,0.35);
|
| 14 |
+
--text: #e8ecf4;
|
| 15 |
+
--muted: #5a6070;
|
| 16 |
+
--accent: #63d2ff;
|
| 17 |
+
--accent-dk: #3ab8e8;
|
| 18 |
+
--success: #3dffa0;
|
| 19 |
+
--danger: #ff5c72;
|
| 20 |
+
--warn: #ffc94a;
|
| 21 |
+
--glow: 0 0 24px rgba(99,210,255,0.12);
|
| 22 |
+
}
|
| 23 |
+
|
| 24 |
+
*, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
|
| 25 |
+
|
| 26 |
+
body {
|
| 27 |
+
font-family: 'Sora', sans-serif;
|
| 28 |
+
background: var(--bg);
|
| 29 |
+
color: var(--text);
|
| 30 |
+
min-height: 100vh;
|
| 31 |
+
display: flex;
|
| 32 |
+
align-items: center;
|
| 33 |
+
justify-content: center;
|
| 34 |
+
padding: 24px;
|
| 35 |
+
background-image:
|
| 36 |
+
linear-gradient(rgba(99,210,255,0.025) 1px, transparent 1px),
|
| 37 |
+
linear-gradient(90deg, rgba(99,210,255,0.025) 1px, transparent 1px);
|
| 38 |
+
background-size: 48px 48px;
|
| 39 |
+
}
|
| 40 |
+
|
| 41 |
+
/* Card */
|
| 42 |
+
.card {
|
| 43 |
+
background: var(--surface);
|
| 44 |
+
border: 1px solid var(--border);
|
| 45 |
+
border-radius: 16px;
|
| 46 |
+
width: 100%;
|
| 47 |
+
max-width: 420px;
|
| 48 |
+
overflow: hidden;
|
| 49 |
+
box-shadow: 0 32px 64px rgba(0,0,0,0.5), var(--glow);
|
| 50 |
+
animation: rise 0.5s cubic-bezier(0.22,1,0.36,1) both;
|
| 51 |
+
}
|
| 52 |
+
@keyframes rise {
|
| 53 |
+
from { opacity:0; transform: translateY(20px); }
|
| 54 |
+
to { opacity:1; transform: translateY(0); }
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
/* Header */
|
| 58 |
+
.card-header {
|
| 59 |
+
padding: 26px 30px 22px;
|
| 60 |
+
border-bottom: 1px solid var(--border);
|
| 61 |
+
display: flex;
|
| 62 |
+
align-items: center;
|
| 63 |
+
gap: 14px;
|
| 64 |
+
}
|
| 65 |
+
.logo-mark {
|
| 66 |
+
width: 38px; height: 38px;
|
| 67 |
+
border-radius: 10px;
|
| 68 |
+
background: linear-gradient(135deg, #1a3a50, #0d2236);
|
| 69 |
+
border: 1px solid var(--border-hi);
|
| 70 |
+
display: flex; align-items: center; justify-content: center;
|
| 71 |
+
flex-shrink: 0;
|
| 72 |
+
}
|
| 73 |
+
.logo-mark svg { width: 20px; height: 20px; }
|
| 74 |
+
.header-text h1 {
|
| 75 |
+
font-size: 15px;
|
| 76 |
+
font-weight: 600;
|
| 77 |
+
letter-spacing: 0.01em;
|
| 78 |
+
color: var(--text);
|
| 79 |
+
}
|
| 80 |
+
.header-text p {
|
| 81 |
+
font-size: 12px;
|
| 82 |
+
color: var(--muted);
|
| 83 |
+
margin-top: 2px;
|
| 84 |
+
font-weight: 300;
|
| 85 |
+
}
|
| 86 |
+
|
| 87 |
+
/* Status Row */
|
| 88 |
+
.status-row {
|
| 89 |
+
padding: 14px 30px;
|
| 90 |
+
border-bottom: 1px solid var(--border);
|
| 91 |
+
display: flex;
|
| 92 |
+
align-items: center;
|
| 93 |
+
justify-content: space-between;
|
| 94 |
+
}
|
| 95 |
+
.status-label { font-size: 11px; color: var(--muted); font-family: 'DM Mono', monospace; letter-spacing: 0.06em; text-transform: uppercase; }
|
| 96 |
+
.pill {
|
| 97 |
+
display: inline-flex; align-items: center; gap: 7px;
|
| 98 |
+
padding: 5px 11px;
|
| 99 |
+
border-radius: 100px;
|
| 100 |
+
font-size: 11px; font-weight: 500;
|
| 101 |
+
font-family: 'DM Mono', monospace;
|
| 102 |
+
letter-spacing: 0.04em;
|
| 103 |
+
background: rgba(61,255,160,0.08);
|
| 104 |
+
color: var(--success);
|
| 105 |
+
border: 1px solid rgba(61,255,160,0.2);
|
| 106 |
+
transition: all 0.3s;
|
| 107 |
+
}
|
| 108 |
+
.pill.error {
|
| 109 |
+
background: rgba(255,92,114,0.08);
|
| 110 |
+
color: var(--danger);
|
| 111 |
+
border-color: rgba(255,92,114,0.2);
|
| 112 |
+
}
|
| 113 |
+
.pill.loading {
|
| 114 |
+
background: rgba(255,201,74,0.08);
|
| 115 |
+
color: var(--warn);
|
| 116 |
+
border-color: rgba(255,201,74,0.2);
|
| 117 |
+
}
|
| 118 |
+
.pill-dot { width: 6px; height: 6px; border-radius: 50%; background: currentColor; }
|
| 119 |
+
.pill-dot.pulse { animation: pulse 1.8s ease-in-out infinite; }
|
| 120 |
+
@keyframes pulse {
|
| 121 |
+
0%,100% { opacity:1; transform: scale(1); }
|
| 122 |
+
50% { opacity:0.4; transform: scale(0.7); }
|
| 123 |
+
}
|
| 124 |
+
|
| 125 |
+
/* Body */
|
| 126 |
+
.card-body { padding: 26px 30px 30px; }
|
| 127 |
+
|
| 128 |
+
/* Fields */
|
| 129 |
+
.field { margin-bottom: 14px; }
|
| 130 |
+
.field label {
|
| 131 |
+
display: block;
|
| 132 |
+
font-size: 10px; font-weight: 500;
|
| 133 |
+
color: var(--muted);
|
| 134 |
+
margin-bottom: 7px;
|
| 135 |
+
letter-spacing: 0.09em;
|
| 136 |
+
text-transform: uppercase;
|
| 137 |
+
font-family: 'DM Mono', monospace;
|
| 138 |
+
}
|
| 139 |
+
.field input {
|
| 140 |
+
width: 100%;
|
| 141 |
+
padding: 10px 14px;
|
| 142 |
+
background: rgba(255,255,255,0.04);
|
| 143 |
+
border: 1px solid var(--border);
|
| 144 |
+
border-radius: 8px;
|
| 145 |
+
color: var(--text);
|
| 146 |
+
font-size: 14px;
|
| 147 |
+
font-family: 'Sora', sans-serif;
|
| 148 |
+
transition: border-color 0.2s, box-shadow 0.2s;
|
| 149 |
+
outline: none;
|
| 150 |
+
}
|
| 151 |
+
.field input::placeholder { color: var(--muted); opacity: 0.55; }
|
| 152 |
+
.field input:focus {
|
| 153 |
+
border-color: var(--border-hi);
|
| 154 |
+
box-shadow: 0 0 0 3px rgba(99,210,255,0.07);
|
| 155 |
+
}
|
| 156 |
+
|
| 157 |
+
/* Buttons */
|
| 158 |
+
.btn {
|
| 159 |
+
width: 100%;
|
| 160 |
+
padding: 11px 16px;
|
| 161 |
+
border: none;
|
| 162 |
+
border-radius: 9px;
|
| 163 |
+
font-family: 'Sora', sans-serif;
|
| 164 |
+
font-size: 13px; font-weight: 500;
|
| 165 |
+
cursor: pointer;
|
| 166 |
+
transition: all 0.2s;
|
| 167 |
+
display: flex; align-items: center; justify-content: center; gap: 8px;
|
| 168 |
+
letter-spacing: 0.02em;
|
| 169 |
+
margin-top: 8px;
|
| 170 |
+
}
|
| 171 |
+
.btn:active { transform: scale(0.98); }
|
| 172 |
+
.btn svg { flex-shrink: 0; }
|
| 173 |
+
|
| 174 |
+
.btn-primary { background: var(--accent); color: #06111a; }
|
| 175 |
+
.btn-primary:hover { background: var(--accent-dk); box-shadow: 0 0 20px rgba(99,210,255,0.22); }
|
| 176 |
+
|
| 177 |
+
.btn-ghost {
|
| 178 |
+
background: rgba(255,255,255,0.04);
|
| 179 |
+
color: var(--text);
|
| 180 |
+
border: 1px solid var(--border);
|
| 181 |
+
}
|
| 182 |
+
.btn-ghost:hover { background: rgba(255,255,255,0.07); border-color: rgba(255,255,255,0.12); }
|
| 183 |
+
|
| 184 |
+
.btn-danger-soft {
|
| 185 |
+
background: rgba(255,92,114,0.07);
|
| 186 |
+
color: var(--danger);
|
| 187 |
+
border: 1px solid rgba(255,92,114,0.18);
|
| 188 |
+
}
|
| 189 |
+
.btn-danger-soft:hover { background: rgba(255,92,114,0.13); }
|
| 190 |
+
|
| 191 |
+
/* Section labels */
|
| 192 |
+
.section-label {
|
| 193 |
+
font-size: 10px; font-weight: 500;
|
| 194 |
+
color: var(--muted);
|
| 195 |
+
text-transform: uppercase;
|
| 196 |
+
letter-spacing: 0.1em;
|
| 197 |
+
font-family: 'DM Mono', monospace;
|
| 198 |
+
margin-top: 20px;
|
| 199 |
+
margin-bottom: 9px;
|
| 200 |
+
}
|
| 201 |
+
.section-label:first-child { margin-top: 0; }
|
| 202 |
+
|
| 203 |
+
/* Log */
|
| 204 |
+
.log-wrap { display: none; margin-top: 16px; }
|
| 205 |
+
.log-header {
|
| 206 |
+
display: flex; align-items: center; justify-content: space-between;
|
| 207 |
+
margin-bottom: 6px;
|
| 208 |
+
}
|
| 209 |
+
.log-header span {
|
| 210 |
+
font-size: 10px;
|
| 211 |
+
font-family: 'DM Mono', monospace;
|
| 212 |
+
color: var(--muted);
|
| 213 |
+
text-transform: uppercase;
|
| 214 |
+
letter-spacing: 0.08em;
|
| 215 |
+
}
|
| 216 |
+
#log-status { transition: color 0.3s; }
|
| 217 |
+
.log-box {
|
| 218 |
+
background: #080a0e;
|
| 219 |
+
border: 1px solid var(--border);
|
| 220 |
+
border-radius: 8px;
|
| 221 |
+
padding: 14px;
|
| 222 |
+
font-family: 'DM Mono', monospace;
|
| 223 |
+
font-size: 11px;
|
| 224 |
+
line-height: 1.75;
|
| 225 |
+
color: #7ee8a2;
|
| 226 |
+
height: 130px;
|
| 227 |
+
overflow-y: auto;
|
| 228 |
+
white-space: pre-wrap;
|
| 229 |
+
word-break: break-all;
|
| 230 |
+
}
|
| 231 |
+
.log-box::-webkit-scrollbar { width: 4px; }
|
| 232 |
+
.log-box::-webkit-scrollbar-thumb { background: #2a2f3a; border-radius: 2px; }
|
| 233 |
+
.log-error { color: var(--danger); }
|
| 234 |
+
|
| 235 |
+
/* Logout */
|
| 236 |
+
.logout-row {
|
| 237 |
+
margin-top: 20px;
|
| 238 |
+
padding-top: 18px;
|
| 239 |
+
border-top: 1px solid var(--border);
|
| 240 |
+
display: flex;
|
| 241 |
+
align-items: center;
|
| 242 |
+
justify-content: space-between;
|
| 243 |
+
}
|
| 244 |
+
.logout-row span { font-size: 12px; color: var(--muted); }
|
| 245 |
+
.logout-btn {
|
| 246 |
+
font-family: 'DM Mono', monospace;
|
| 247 |
+
font-size: 11px;
|
| 248 |
+
background: none;
|
| 249 |
+
border: 1px solid var(--border);
|
| 250 |
+
color: var(--muted);
|
| 251 |
+
padding: 6px 13px;
|
| 252 |
+
border-radius: 6px;
|
| 253 |
+
cursor: pointer;
|
| 254 |
+
transition: all 0.2s;
|
| 255 |
+
letter-spacing: 0.05em;
|
| 256 |
+
}
|
| 257 |
+
.logout-btn:hover { color: var(--danger); border-color: rgba(255,92,114,0.3); background: rgba(255,92,114,0.05); }
|
| 258 |
+
|
| 259 |
+
input[type="number"]::-webkit-inner-spin-button,
|
| 260 |
+
input[type="number"]::-webkit-outer-spin-button { opacity: 0.4; }
|
| 261 |
+
</style>
|
| 262 |
+
</head>
|
| 263 |
+
<body>
|
| 264 |
+
|
| 265 |
+
<div class="card">
|
| 266 |
+
|
| 267 |
+
<!-- Header -->
|
| 268 |
+
<div class="card-header">
|
| 269 |
+
<div class="logo-mark">
|
| 270 |
+
<svg viewBox="0 0 20 20" fill="none" xmlns="http://www.w3.org/2000/svg">
|
| 271 |
+
<path d="M3 5h14M3 10h9M3 15h5" stroke="#63d2ff" stroke-width="1.8" stroke-linecap="round"/>
|
| 272 |
+
<circle cx="16" cy="14" r="3" stroke="#63d2ff" stroke-width="1.5"/>
|
| 273 |
+
<path d="M18.5 16.5L20 18" stroke="#63d2ff" stroke-width="1.5" stroke-linecap="round"/>
|
| 274 |
+
</svg>
|
| 275 |
+
</div>
|
| 276 |
+
<div class="header-text">
|
| 277 |
+
<h1>RAG Admin Console</h1>
|
| 278 |
+
<p>Knowledge base management</p>
|
| 279 |
+
</div>
|
| 280 |
+
</div>
|
| 281 |
+
|
| 282 |
+
<!-- Status -->
|
| 283 |
+
<div class="status-row">
|
| 284 |
+
<span class="status-label">System Status</span>
|
| 285 |
+
<span id="status-pill" class="pill loading">
|
| 286 |
+
<span class="pill-dot pulse"></span>
|
| 287 |
+
<span id="status-text">Checking...</span>
|
| 288 |
+
</span>
|
| 289 |
+
</div>
|
| 290 |
+
|
| 291 |
+
<!-- Body -->
|
| 292 |
+
<div class="card-body">
|
| 293 |
+
|
| 294 |
+
<!-- Login -->
|
| 295 |
+
<div id="login-section">
|
| 296 |
+
<div class="field">
|
| 297 |
+
<label>Username</label>
|
| 298 |
+
<input type="text" id="username" placeholder="admin@example.com" autocomplete="username">
|
| 299 |
+
</div>
|
| 300 |
+
<div class="field">
|
| 301 |
+
<label>Password</label>
|
| 302 |
+
<input type="password" id="password" placeholder="••••••••" autocomplete="current-password"
|
| 303 |
+
onkeydown="if(event.key==='Enter') login()">
|
| 304 |
+
</div>
|
| 305 |
+
<button class="btn btn-primary" onclick="login()">
|
| 306 |
+
<svg width="14" height="14" viewBox="0 0 16 16" fill="none">
|
| 307 |
+
<path d="M6 2H3a1 1 0 00-1 1v10a1 1 0 001 1h3M10 5l3 3-3 3M13 8H6"
|
| 308 |
+
stroke="currentColor" stroke-width="1.6" stroke-linecap="round" stroke-linejoin="round"/>
|
| 309 |
+
</svg>
|
| 310 |
+
Sign In
|
| 311 |
+
</button>
|
| 312 |
+
</div>
|
| 313 |
+
|
| 314 |
+
<!-- Admin Panel -->
|
| 315 |
+
<div id="admin-section" style="display:none;">
|
| 316 |
+
|
| 317 |
+
<p class="section-label">URL Sources</p>
|
| 318 |
+
<button class="btn btn-ghost" onclick="performAction('/admin/fetch_rentry')">
|
| 319 |
+
<svg width="14" height="14" viewBox="0 0 16 16" fill="none">
|
| 320 |
+
<path d="M13 8A5 5 0 112.5 6.5" stroke="currentColor" stroke-width="1.6" stroke-linecap="round"/>
|
| 321 |
+
<path d="M2 3.5L2.5 6.5 5.5 6" stroke="currentColor" stroke-width="1.6" stroke-linecap="round" stroke-linejoin="round"/>
|
| 322 |
+
</svg>
|
| 323 |
+
Fetch & Update from URL
|
| 324 |
+
</button>
|
| 325 |
+
|
| 326 |
+
<p class="section-label" style="margin-top:20px;">Local Index</p>
|
| 327 |
+
<div class="field">
|
| 328 |
+
<label>Max files (incremental)</label>
|
| 329 |
+
<input type="number" id="max-files" value="50" min="1" max="500">
|
| 330 |
+
</div>
|
| 331 |
+
<button class="btn btn-ghost" onclick="performAction('/admin/update_faiss_index')">
|
| 332 |
+
<svg width="14" height="14" viewBox="0 0 16 16" fill="none">
|
| 333 |
+
<path d="M8 2v6m0 0l-2.5-2.5M8 8l2.5-2.5" stroke="currentColor" stroke-width="1.6" stroke-linecap="round" stroke-linejoin="round"/>
|
| 334 |
+
<path d="M2 11v1a2 2 0 002 2h8a2 2 0 002-2v-1" stroke="currentColor" stroke-width="1.6" stroke-linecap="round"/>
|
| 335 |
+
</svg>
|
| 336 |
+
Update Index — New Files Only
|
| 337 |
+
</button>
|
| 338 |
+
<button class="btn btn-danger-soft" onclick="confirmRebuild()">
|
| 339 |
+
<svg width="14" height="14" viewBox="0 0 16 16" fill="none">
|
| 340 |
+
<path d="M3 8a5 5 0 0110 0" stroke="currentColor" stroke-width="1.6" stroke-linecap="round"/>
|
| 341 |
+
<path d="M13 5.5V8h-2.5M3 10.5V8h2.5" stroke="currentColor" stroke-width="1.6" stroke-linecap="round" stroke-linejoin="round"/>
|
| 342 |
+
</svg>
|
| 343 |
+
Rebuild Full Index
|
| 344 |
+
</button>
|
| 345 |
+
|
| 346 |
+
<!-- Log -->
|
| 347 |
+
<div class="log-wrap" id="log-wrap">
|
| 348 |
+
<div class="log-header">
|
| 349 |
+
<span>Operation Log</span>
|
| 350 |
+
<span id="log-status" style="color:var(--warn)">running…</span>
|
| 351 |
+
</div>
|
| 352 |
+
<div class="log-box" id="log-box"></div>
|
| 353 |
+
</div>
|
| 354 |
+
|
| 355 |
+
<!-- Logout -->
|
| 356 |
+
<div class="logout-row">
|
| 357 |
+
<span id="signed-in-label">Signed in as admin</span>
|
| 358 |
+
<button class="logout-btn" onclick="logout()">Sign out</button>
|
| 359 |
+
</div>
|
| 360 |
+
</div>
|
| 361 |
+
|
| 362 |
+
</div>
|
| 363 |
+
</div>
|
| 364 |
+
|
| 365 |
+
<script src="https://cdn.jsdelivr.net/npm/axios/dist/axios.min.js"></script>
|
| 366 |
+
<script>
|
| 367 |
+
let authHeader = null;
|
| 368 |
+
|
| 369 |
+
window.onload = async () => {
|
| 370 |
+
await checkStatus();
|
| 371 |
+
const savedUser = localStorage.getItem('rag_admin_user');
|
| 372 |
+
const savedPass = localStorage.getItem('rag_admin_pass');
|
| 373 |
+
if (savedUser && savedPass) {
|
| 374 |
+
document.getElementById('username').value = savedUser;
|
| 375 |
+
document.getElementById('password').value = savedPass;
|
| 376 |
+
await login(true);
|
| 377 |
+
}
|
| 378 |
+
};
|
| 379 |
+
|
| 380 |
+
async function checkStatus() {
|
| 381 |
+
const pill = document.getElementById('status-pill');
|
| 382 |
+
const text = document.getElementById('status-text');
|
| 383 |
+
const dot = pill.querySelector('.pill-dot');
|
| 384 |
+
try {
|
| 385 |
+
const res = await axios.get('/status');
|
| 386 |
+
dot.classList.remove('pulse');
|
| 387 |
+
if (res.data.rag_initialized) {
|
| 388 |
+
pill.className = 'pill';
|
| 389 |
+
text.textContent = 'Online';
|
| 390 |
+
} else {
|
| 391 |
+
pill.className = 'pill loading';
|
| 392 |
+
dot.classList.add('pulse');
|
| 393 |
+
text.textContent = 'Not Initialized';
|
| 394 |
+
}
|
| 395 |
+
} catch {
|
| 396 |
+
dot.classList.remove('pulse');
|
| 397 |
+
pill.className = 'pill error';
|
| 398 |
+
text.textContent = 'Offline';
|
| 399 |
+
}
|
| 400 |
+
}
|
| 401 |
+
|
| 402 |
+
async function login(isSilent = false) {
|
| 403 |
+
const u = document.getElementById('username').value.trim();
|
| 404 |
+
const p = document.getElementById('password').value;
|
| 405 |
+
try {
|
| 406 |
+
await axios.post('/admin/login', {}, { auth: { username: u, password: p } });
|
| 407 |
+
authHeader = { username: u, password: p };
|
| 408 |
+
localStorage.setItem('rag_admin_user', u);
|
| 409 |
+
localStorage.setItem('rag_admin_pass', p);
|
| 410 |
+
document.getElementById('login-section').style.display = 'none';
|
| 411 |
+
document.getElementById('admin-section').style.display = 'block';
|
| 412 |
+
document.getElementById('signed-in-label').textContent = `Signed in as ${u}`;
|
| 413 |
+
document.getElementById('password').value = '';
|
| 414 |
+
} catch {
|
| 415 |
+
if (!isSilent) flashError();
|
| 416 |
+
logout();
|
| 417 |
+
}
|
| 418 |
+
}
|
| 419 |
+
|
| 420 |
+
function logout() {
|
| 421 |
+
authHeader = null;
|
| 422 |
+
localStorage.removeItem('rag_admin_user');
|
| 423 |
+
localStorage.removeItem('rag_admin_pass');
|
| 424 |
+
document.getElementById('username').value = '';
|
| 425 |
+
document.getElementById('password').value = '';
|
| 426 |
+
document.getElementById('login-section').style.display = 'block';
|
| 427 |
+
document.getElementById('admin-section').style.display = 'none';
|
| 428 |
+
document.getElementById('log-wrap').style.display = 'none';
|
| 429 |
+
}
|
| 430 |
+
|
| 431 |
+
function flashError() {
|
| 432 |
+
const el = document.getElementById('password');
|
| 433 |
+
el.style.borderColor = 'var(--danger)';
|
| 434 |
+
el.style.boxShadow = '0 0 0 3px rgba(255,92,114,0.1)';
|
| 435 |
+
setTimeout(() => { el.style.borderColor = ''; el.style.boxShadow = ''; }, 2500);
|
| 436 |
+
}
|
| 437 |
+
|
| 438 |
+
function confirmRebuild() {
|
| 439 |
+
if (confirm('Rebuild the full index? This will re-process all documents and may take several minutes.')) {
|
| 440 |
+
performAction('/admin/rebuild_index');
|
| 441 |
+
}
|
| 442 |
+
}
|
| 443 |
+
|
| 444 |
+
async function performAction(url) {
|
| 445 |
+
const wrap = document.getElementById('log-wrap');
|
| 446 |
+
const logBox = document.getElementById('log-box');
|
| 447 |
+
const logSt = document.getElementById('log-status');
|
| 448 |
+
|
| 449 |
+
wrap.style.display = 'block';
|
| 450 |
+
logBox.textContent = 'Processing — this may take a minute…\n';
|
| 451 |
+
logSt.textContent = 'running…';
|
| 452 |
+
logSt.style.color = 'var(--warn)';
|
| 453 |
+
|
| 454 |
+
const payload = {};
|
| 455 |
+
if (url.includes('update')) {
|
| 456 |
+
payload.max_new_files = parseInt(document.getElementById('max-files').value, 10) || 50;
|
| 457 |
+
}
|
| 458 |
+
|
| 459 |
+
try {
|
| 460 |
+
const res = await axios.post(url, payload, { auth: authHeader });
|
| 461 |
+
logBox.textContent += '\n[SUCCESS]\n' + JSON.stringify(res.data, null, 2);
|
| 462 |
+
logSt.textContent = 'done';
|
| 463 |
+
logSt.style.color = 'var(--success)';
|
| 464 |
+
checkStatus();
|
| 465 |
+
} catch (e) {
|
| 466 |
+
const msg = e.response ? JSON.stringify(e.response.data, null, 2) : e.message;
|
| 467 |
+
logBox.innerHTML += `\n<span class="log-error">[ERROR]\n${msg}</span>`;
|
| 468 |
+
logSt.textContent = 'failed';
|
| 469 |
+
logSt.style.color = 'var(--danger)';
|
| 470 |
+
if (e.response?.status === 401) {
|
| 471 |
+
alert('Session expired. Please sign in again.');
|
| 472 |
+
logout();
|
| 473 |
+
}
|
| 474 |
+
} finally {
|
| 475 |
+
logBox.scrollTop = logBox.scrollHeight;
|
| 476 |
+
}
|
| 477 |
+
}
|
| 478 |
+
</script>
|
| 479 |
+
</body>
|
| 480 |
+
</html>
|
utils.py
ADDED
|
@@ -0,0 +1,150 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import logging
|
| 3 |
+
import re
|
| 4 |
+
import shutil
|
| 5 |
+
import tempfile
|
| 6 |
+
import zipfile
|
| 7 |
+
import requests
|
| 8 |
+
from typing import Optional
|
| 9 |
+
from bs4 import BeautifulSoup
|
| 10 |
+
|
| 11 |
+
import gdown
|
| 12 |
+
from pypdf import PdfReader
|
| 13 |
+
import docx as python_docx
|
| 14 |
+
|
| 15 |
+
logger = logging.getLogger(__name__)
|
| 16 |
+
|
| 17 |
+
def extract_text_from_file(file_path: str, file_type: str) -> Optional[str]:
|
| 18 |
+
logger.info(f"[TEXT_EXTRACTION] Starting extraction from {file_type.upper()} file: {file_path}")
|
| 19 |
+
text_content = None
|
| 20 |
+
try:
|
| 21 |
+
if file_type == 'pdf':
|
| 22 |
+
reader = PdfReader(file_path)
|
| 23 |
+
text_content = "".join(page.extract_text() + "\n" for page in reader.pages if page.extract_text())
|
| 24 |
+
elif file_type == 'docx':
|
| 25 |
+
doc = python_docx.Document(file_path)
|
| 26 |
+
text_content = "\n".join(para.text for para in doc.paragraphs if para.text)
|
| 27 |
+
elif file_type == 'txt':
|
| 28 |
+
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
|
| 29 |
+
text_content = f.read()
|
| 30 |
+
else:
|
| 31 |
+
logger.warning(f"[TEXT_EXTRACTION] Unsupported file type: {file_type}")
|
| 32 |
+
return None
|
| 33 |
+
|
| 34 |
+
if not text_content or not text_content.strip():
|
| 35 |
+
logger.warning(f"[TEXT_EXTRACTION] No text content extracted from {file_path}")
|
| 36 |
+
return None
|
| 37 |
+
|
| 38 |
+
return text_content.strip()
|
| 39 |
+
except Exception as e:
|
| 40 |
+
logger.error(f"[TEXT_EXTRACTION] Error extracting text: {e}", exc_info=True)
|
| 41 |
+
return None
|
| 42 |
+
|
| 43 |
+
FAISS_RAG_SUPPORTED_EXTENSIONS = {
|
| 44 |
+
'pdf': lambda path: extract_text_from_file(path, 'pdf'),
|
| 45 |
+
'docx': lambda path: extract_text_from_file(path, 'docx'),
|
| 46 |
+
'txt': lambda path: extract_text_from_file(path, 'txt'),
|
| 47 |
+
'csv': lambda path: "CSV_HANDLED_NATIVELY" # Bypassed directly in components to allow row-by-row chunks
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
def fetch_and_clean_url(url: str, output_txt_path: str) -> bool:
|
| 51 |
+
"""Fetches HTML from a URL, cleans it, and saves it as pure text."""
|
| 52 |
+
try:
|
| 53 |
+
logger.info(f"[URL_FETCH] Fetching and cleaning data from: {url}")
|
| 54 |
+
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'}
|
| 55 |
+
response = requests.get(url, headers=headers, timeout=15)
|
| 56 |
+
response.raise_for_status()
|
| 57 |
+
|
| 58 |
+
soup = BeautifulSoup(response.text, 'html.parser')
|
| 59 |
+
|
| 60 |
+
# Remove non-content tags completely
|
| 61 |
+
for tag in soup(["script", "style", "nav", "footer", "header", "noscript"]):
|
| 62 |
+
tag.decompose()
|
| 63 |
+
|
| 64 |
+
# Attempt to find specific content divs if they exist (generalizes rentry and others)
|
| 65 |
+
entry = soup.find('div', class_='entry-text')
|
| 66 |
+
if entry:
|
| 67 |
+
text = entry.get_text(separator='\n', strip=True)
|
| 68 |
+
else:
|
| 69 |
+
# Fallback for other sites or layout changes
|
| 70 |
+
text = soup.body.get_text(separator='\n', strip=True) if soup.body else soup.get_text(separator='\n', strip=True)
|
| 71 |
+
|
| 72 |
+
# Clean up excessive blank lines
|
| 73 |
+
text = re.sub(r'\n\s*\n', '\n\n', text)
|
| 74 |
+
|
| 75 |
+
# Ensure directory exists
|
| 76 |
+
os.makedirs(os.path.dirname(output_txt_path), exist_ok=True)
|
| 77 |
+
|
| 78 |
+
with open(output_txt_path, 'w', encoding='utf-8') as f:
|
| 79 |
+
f.write(text)
|
| 80 |
+
|
| 81 |
+
logger.info(f"[URL_FETCH] Success! Text saved to {output_txt_path}")
|
| 82 |
+
return True
|
| 83 |
+
except Exception as e:
|
| 84 |
+
logger.error(f"[URL_FETCH] Error fetching/cleaning URL {url}: {e}", exc_info=True)
|
| 85 |
+
return False
|
| 86 |
+
|
| 87 |
+
def get_id_from_gdrive_input(url_or_id: str) -> Optional[str]:
|
| 88 |
+
if not url_or_id: return None
|
| 89 |
+
match_folder = re.search(r"/folders/([a-zA-Z0-9_-]+)", url_or_id)
|
| 90 |
+
if match_folder: return match_folder.group(1)
|
| 91 |
+
match_file_d = re.search(r"/d/([a-zA-Z0-9_-]+)", url_or_id)
|
| 92 |
+
if match_file_d: return match_file_d.group(1)
|
| 93 |
+
match_uc = re.search(r"id=([a-zA-Z0-9_-]+)", url_or_id)
|
| 94 |
+
if match_uc: return match_uc.group(1)
|
| 95 |
+
return url_or_id if len(url_or_id) > 10 else None
|
| 96 |
+
|
| 97 |
+
def download_gdrive_file(file_id_or_url: str, target_path: str) -> bool:
|
| 98 |
+
logger.info(f"[GDRIVE_SINGLE] Downloading file. Input: {file_id_or_url}")
|
| 99 |
+
file_id = get_id_from_gdrive_input(file_id_or_url)
|
| 100 |
+
if not file_id: return False
|
| 101 |
+
|
| 102 |
+
try:
|
| 103 |
+
os.makedirs(os.path.dirname(target_path), exist_ok=True)
|
| 104 |
+
gdown.download(id=file_id, output=target_path, quiet=False, fuzzy=True)
|
| 105 |
+
|
| 106 |
+
if os.path.exists(target_path) and os.path.getsize(target_path) > 0:
|
| 107 |
+
return True
|
| 108 |
+
return False
|
| 109 |
+
except Exception as e:
|
| 110 |
+
logger.error(f"[GDRIVE_SINGLE] Error: {e}", exc_info=True)
|
| 111 |
+
return False
|
| 112 |
+
|
| 113 |
+
def download_and_unzip_gdrive_folder(folder_id_or_url: str, target_dir_for_contents: str) -> bool:
|
| 114 |
+
logger.info(f"[GDRIVE] Downloading folder. Input: {folder_id_or_url}")
|
| 115 |
+
folder_id = get_id_from_gdrive_input(folder_id_or_url)
|
| 116 |
+
if not folder_id: return False
|
| 117 |
+
|
| 118 |
+
temp_dir = tempfile.mkdtemp()
|
| 119 |
+
try:
|
| 120 |
+
gdown.download_folder(id=folder_id, output=temp_dir, quiet=False, use_cookies=False)
|
| 121 |
+
if not os.path.exists(target_dir_for_contents):
|
| 122 |
+
os.makedirs(target_dir_for_contents)
|
| 123 |
+
|
| 124 |
+
src_root = temp_dir
|
| 125 |
+
if len(os.listdir(temp_dir)) == 1 and os.path.isdir(os.path.join(temp_dir, os.listdir(temp_dir)[0])):
|
| 126 |
+
src_root = os.path.join(temp_dir, os.listdir(temp_dir)[0])
|
| 127 |
+
|
| 128 |
+
for item in os.listdir(src_root):
|
| 129 |
+
shutil.move(os.path.join(src_root, item), os.path.join(target_dir_for_contents, item))
|
| 130 |
+
return True
|
| 131 |
+
except Exception as e:
|
| 132 |
+
logger.error(f"[GDRIVE] Error: {e}", exc_info=True)
|
| 133 |
+
return False
|
| 134 |
+
finally:
|
| 135 |
+
shutil.rmtree(temp_dir, ignore_errors=True)
|
| 136 |
+
|
| 137 |
+
def download_and_unzip_gdrive_file(file_id_or_url: str, target_extraction_dir: str) -> bool:
|
| 138 |
+
logger.info(f"[GDRIVE_ZIP] Downloading ZIP. Input: {file_id_or_url}")
|
| 139 |
+
file_id = get_id_from_gdrive_input(file_id_or_url)
|
| 140 |
+
if not file_id: return False
|
| 141 |
+
|
| 142 |
+
temp_zip = os.path.join(tempfile.gettempdir(), "temp_download.zip")
|
| 143 |
+
try:
|
| 144 |
+
gdown.download(id=file_id, output=temp_zip, quiet=False)
|
| 145 |
+
with zipfile.ZipFile(temp_zip, 'r') as zip_ref:
|
| 146 |
+
zip_ref.extractall(target_extraction_dir)
|
| 147 |
+
return True
|
| 148 |
+
except Exception as e:
|
| 149 |
+
logger.error(f"[GDRIVE_ZIP] Error: {e}", exc_info=True)
|
| 150 |
+
return False
|