Spaces:
Sleeping
Sleeping
File size: 4,885 Bytes
2162545 78e09e0 2162545 78e09e0 2162545 78e09e0 5782137 78e09e0 5782137 2162545 5782137 78e09e0 2162545 78e09e0 5782137 78e09e0 5782137 78e09e0 2162545 5782137 2162545 5782137 2162545 5782137 2162545 5782137 2162545 5782137 2162545 5782137 2162545 5782137 2162545 5782137 2162545 5782137 2162545 5782137 2162545 5782137 2162545 5782137 2162545 5782137 2162545 5782137 2162545 5782137 2162545 5782137 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 | # backend/qdrant_client.py
from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Distance, PayloadSchemaType, FilterSelector, Filter, CollectionStatus
import os
from dotenv import load_dotenv
load_dotenv()
# === Environment Variables ===
QDRANT_HOST = os.getenv(
"QDRANT_HOST",
"https://9485db48-8672-469a-a917-41a4ebbfd533.us-east4-0.gcp.cloud.qdrant.io" # Your cloud URL
)
QDRANT_API_KEY = os.getenv("QDRANT_API_KEY") # Only needed for cloud Qdrant
# === Collection Names ===
KB_COLLECTION = "rag_collection" # For document embeddings
CHAT_HISTORY_COLLECTION = "chat_history_collection" # For chat messages
# === Qdrant Client Initialization ===
qdrant_client = QdrantClient(
url=QDRANT_HOST,
api_key=QDRANT_API_KEY,
prefer_grpc=False, # Set to True for gRPC, False for REST
timeout=30, # Increased timeout for potentially slow operations
check_compatibility=False, # Skip version check to avoid warnings
)
print("β
Connected to Qdrant Cloud")
# === Collection Creation and Management ===
def ensure_collection_exists(name: str, vector_size: int = 384):
"""
Guarantees that a collection exists; creates it only if it is missing.
Uses create_collection (non-destructive) so no delete permission is needed.
"""
try:
qdrant_client.get_collection(collection_name=name)
print(f"β
Collection '{name}' already exists.")
except Exception:
print(f"π Creating collection: {name}")
try:
qdrant_client.create_collection(
collection_name=name,
vectors_config=VectorParams(
size=vector_size,
distance=Distance.COSINE,
),
)
print(f"π Collection '{name}' created successfully!")
except Exception as e:
print(f"β Failed to create collection '{name}': {e}")
# === Payload Indexing Helper ===
def create_index_if_needed(collection: str, field_name: str, schema_type: str):
"""
Creates a payload index on a field if it is not present already.
"""
try:
schema_enum = getattr(PayloadSchemaType, schema_type.upper())
qdrant_client.create_payload_index(
collection_name=collection,
field_name=field_name,
field_schema=schema_enum,
)
print(f"π§ Indexed '{field_name}' as {schema_type} in '{collection}'")
except Exception as e:
if "already exists" in str(e):
pass # Index is already there β ignore
else:
print(f"β οΈ Could not create index '{field_name}' on '{collection}': {e}")
# === Data Cleanup Utility (for development/testing) ===
def clean_collections():
"""
Deletes ALL points from both collections.
Call it manually; do NOT run automatically in production.
"""
print("π§Ή Cleaning old data from all collections...")
# Selector to delete all points in a collection (empty Filter() means no specific filter)
all_points_selector = FilterSelector(filter=Filter())
try:
qdrant_client.delete(
collection_name=KB_COLLECTION,
points_selector=all_points_selector
)
print(f"ποΈ All data cleaned from '{KB_COLLECTION}'.")
qdrant_client.delete(
collection_name=CHAT_HISTORY_COLLECTION,
points_selector=all_points_selector
)
print(f"ποΈ All data cleaned from '{CHAT_HISTORY_COLLECTION}'.")
print("ποΈ All old data cleaned from collections successfully.")
except Exception as e:
print(f"β Error during collection cleanup: {e}")
# === Initial Setup when this module is imported ===
# Ensure collections exist and create necessary payload indexes
ensure_collection_exists(KB_COLLECTION)
ensure_collection_exists(CHAT_HISTORY_COLLECTION)
# Create indexes for filtering and ordering chat history
create_index_if_needed(CHAT_HISTORY_COLLECTION, "session_id", "keyword")
create_index_if_needed(CHAT_HISTORY_COLLECTION, "turn_number", "integer")
create_index_if_needed(CHAT_HISTORY_COLLECTION, "timestamp", "keyword") # Useful for sorting/filtering by time
# Create indexes for filtering knowledge base documents
create_index_if_needed(KB_COLLECTION, "session_id", "keyword")
create_index_if_needed(KB_COLLECTION, "upload_timestamp", "keyword")
create_index_if_needed(KB_COLLECTION, "file_type", "keyword")
create_index_if_needed(KB_COLLECTION, "source", "keyword") # Index source if you use it for filtering
# === IMPORTANT: Data wipe is now commented out ===
# This line will wipe all your data from Qdrant EVERY TIME the backend starts.
# It's useful for initial setup and debugging, but comment it out for persistence.
# Uncomment ONLY when you need to clear data manually:
# clean_collections()
|