Spaces:
Sleeping
Sleeping
korupolujayanth2004 commited on
Commit Β·
5782137
1
Parent(s): 60f33ef
Update qdrant_client.py
Browse files- backend/qdrant_client.py +40 -43
backend/qdrant_client.py
CHANGED
|
@@ -10,96 +10,92 @@ load_dotenv()
|
|
| 10 |
# === Environment Variables ===
|
| 11 |
QDRANT_HOST = os.getenv(
|
| 12 |
"QDRANT_HOST",
|
| 13 |
-
"
|
| 14 |
)
|
| 15 |
-
QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
|
| 16 |
|
| 17 |
# === Collection Names ===
|
| 18 |
-
KB_COLLECTION = "rag_collection"
|
| 19 |
-
CHAT_HISTORY_COLLECTION = "chat_history_collection"
|
| 20 |
|
| 21 |
# === Qdrant Client Initialization ===
|
| 22 |
qdrant_client = QdrantClient(
|
| 23 |
url=QDRANT_HOST,
|
| 24 |
api_key=QDRANT_API_KEY,
|
| 25 |
-
prefer_grpc=False,
|
| 26 |
-
timeout=30,
|
| 27 |
-
|
| 28 |
)
|
| 29 |
|
| 30 |
-
print("β
Connected to Qdrant
|
| 31 |
|
| 32 |
# === Collection Creation and Management ===
|
| 33 |
def ensure_collection_exists(name: str, vector_size: int = 384):
|
| 34 |
"""
|
| 35 |
-
|
|
|
|
| 36 |
"""
|
| 37 |
try:
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
if info.status == CollectionStatus.GREEN:
|
| 41 |
-
print(f"β
Collection exists and is ready: {name}")
|
| 42 |
-
return
|
| 43 |
except Exception:
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
|
|
|
|
|
|
| 54 |
|
| 55 |
# === Payload Indexing Helper ===
|
| 56 |
def create_index_if_needed(collection: str, field_name: str, schema_type: str):
|
| 57 |
"""
|
| 58 |
-
Creates a payload index on a
|
| 59 |
-
Payload indexes speed up filtering operations (e.g., by session_id).
|
| 60 |
"""
|
| 61 |
try:
|
| 62 |
schema_enum = getattr(PayloadSchemaType, schema_type.upper())
|
| 63 |
qdrant_client.create_payload_index(
|
| 64 |
collection_name=collection,
|
| 65 |
field_name=field_name,
|
| 66 |
-
field_schema=schema_enum
|
| 67 |
)
|
| 68 |
print(f"π§ Indexed '{field_name}' as {schema_type} in '{collection}'")
|
| 69 |
except Exception as e:
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
# print(f"βΉοΈ Index for '{field_name}' in '{collection}' already exists.")
|
| 73 |
-
pass # Suppress common "already exists" error
|
| 74 |
else:
|
| 75 |
-
print(f"β οΈ
|
| 76 |
-
|
| 77 |
|
| 78 |
# === Data Cleanup Utility (for development/testing) ===
|
| 79 |
def clean_collections():
|
| 80 |
"""
|
| 81 |
-
Deletes
|
| 82 |
-
|
| 83 |
"""
|
| 84 |
print("π§Ή Cleaning old data from all collections...")
|
| 85 |
-
|
| 86 |
# Selector to delete all points in a collection (empty Filter() means no specific filter)
|
| 87 |
all_points_selector = FilterSelector(filter=Filter())
|
| 88 |
-
|
| 89 |
try:
|
| 90 |
qdrant_client.delete(
|
| 91 |
collection_name=KB_COLLECTION,
|
| 92 |
points_selector=all_points_selector
|
| 93 |
)
|
| 94 |
print(f"ποΈ All data cleaned from '{KB_COLLECTION}'.")
|
| 95 |
-
|
| 96 |
qdrant_client.delete(
|
| 97 |
collection_name=CHAT_HISTORY_COLLECTION,
|
| 98 |
points_selector=all_points_selector
|
| 99 |
)
|
| 100 |
print(f"ποΈ All data cleaned from '{CHAT_HISTORY_COLLECTION}'.")
|
| 101 |
print("ποΈ All old data cleaned from collections successfully.")
|
| 102 |
-
|
| 103 |
except Exception as e:
|
| 104 |
print(f"β Error during collection cleanup: {e}")
|
| 105 |
|
|
@@ -111,15 +107,16 @@ ensure_collection_exists(CHAT_HISTORY_COLLECTION)
|
|
| 111 |
# Create indexes for filtering and ordering chat history
|
| 112 |
create_index_if_needed(CHAT_HISTORY_COLLECTION, "session_id", "keyword")
|
| 113 |
create_index_if_needed(CHAT_HISTORY_COLLECTION, "turn_number", "integer")
|
| 114 |
-
create_index_if_needed(CHAT_HISTORY_COLLECTION, "timestamp", "keyword")
|
| 115 |
|
| 116 |
# Create indexes for filtering knowledge base documents
|
| 117 |
create_index_if_needed(KB_COLLECTION, "session_id", "keyword")
|
| 118 |
create_index_if_needed(KB_COLLECTION, "upload_timestamp", "keyword")
|
| 119 |
create_index_if_needed(KB_COLLECTION, "file_type", "keyword")
|
| 120 |
-
create_index_if_needed(KB_COLLECTION, "source", "keyword")
|
| 121 |
|
| 122 |
-
# === IMPORTANT:
|
| 123 |
# This line will wipe all your data from Qdrant EVERY TIME the backend starts.
|
| 124 |
# It's useful for initial setup and debugging, but comment it out for persistence.
|
| 125 |
-
|
|
|
|
|
|
| 10 |
# === Environment Variables ===
|
| 11 |
QDRANT_HOST = os.getenv(
|
| 12 |
"QDRANT_HOST",
|
| 13 |
+
"https://9485db48-8672-469a-a917-41a4ebbfd533.us-east4-0.gcp.cloud.qdrant.io" # Your cloud URL
|
| 14 |
)
|
| 15 |
+
QDRANT_API_KEY = os.getenv("QDRANT_API_KEY") # Only needed for cloud Qdrant
|
| 16 |
|
| 17 |
# === Collection Names ===
|
| 18 |
+
KB_COLLECTION = "rag_collection" # For document embeddings
|
| 19 |
+
CHAT_HISTORY_COLLECTION = "chat_history_collection" # For chat messages
|
| 20 |
|
| 21 |
# === Qdrant Client Initialization ===
|
| 22 |
qdrant_client = QdrantClient(
|
| 23 |
url=QDRANT_HOST,
|
| 24 |
api_key=QDRANT_API_KEY,
|
| 25 |
+
prefer_grpc=False, # Set to True for gRPC, False for REST
|
| 26 |
+
timeout=30, # Increased timeout for potentially slow operations
|
| 27 |
+
check_compatibility=False, # Skip version check to avoid warnings
|
| 28 |
)
|
| 29 |
|
| 30 |
+
print("β
Connected to Qdrant Cloud")
|
| 31 |
|
| 32 |
# === Collection Creation and Management ===
|
| 33 |
def ensure_collection_exists(name: str, vector_size: int = 384):
|
| 34 |
"""
|
| 35 |
+
Guarantees that a collection exists; creates it only if it is missing.
|
| 36 |
+
Uses create_collection (non-destructive) so no delete permission is needed.
|
| 37 |
"""
|
| 38 |
try:
|
| 39 |
+
qdrant_client.get_collection(collection_name=name)
|
| 40 |
+
print(f"β
Collection '{name}' already exists.")
|
|
|
|
|
|
|
|
|
|
| 41 |
except Exception:
|
| 42 |
+
print(f"π Creating collection: {name}")
|
| 43 |
+
try:
|
| 44 |
+
qdrant_client.create_collection(
|
| 45 |
+
collection_name=name,
|
| 46 |
+
vectors_config=VectorParams(
|
| 47 |
+
size=vector_size,
|
| 48 |
+
distance=Distance.COSINE,
|
| 49 |
+
),
|
| 50 |
+
)
|
| 51 |
+
print(f"π Collection '{name}' created successfully!")
|
| 52 |
+
except Exception as e:
|
| 53 |
+
print(f"β Failed to create collection '{name}': {e}")
|
| 54 |
|
| 55 |
# === Payload Indexing Helper ===
|
| 56 |
def create_index_if_needed(collection: str, field_name: str, schema_type: str):
|
| 57 |
"""
|
| 58 |
+
Creates a payload index on a field if it is not present already.
|
|
|
|
| 59 |
"""
|
| 60 |
try:
|
| 61 |
schema_enum = getattr(PayloadSchemaType, schema_type.upper())
|
| 62 |
qdrant_client.create_payload_index(
|
| 63 |
collection_name=collection,
|
| 64 |
field_name=field_name,
|
| 65 |
+
field_schema=schema_enum,
|
| 66 |
)
|
| 67 |
print(f"π§ Indexed '{field_name}' as {schema_type} in '{collection}'")
|
| 68 |
except Exception as e:
|
| 69 |
+
if "already exists" in str(e):
|
| 70 |
+
pass # Index is already there β ignore
|
|
|
|
|
|
|
| 71 |
else:
|
| 72 |
+
print(f"β οΈ Could not create index '{field_name}' on '{collection}': {e}")
|
|
|
|
| 73 |
|
| 74 |
# === Data Cleanup Utility (for development/testing) ===
|
| 75 |
def clean_collections():
|
| 76 |
"""
|
| 77 |
+
Deletes ALL points from both collections.
|
| 78 |
+
Call it manually; do NOT run automatically in production.
|
| 79 |
"""
|
| 80 |
print("π§Ή Cleaning old data from all collections...")
|
| 81 |
+
|
| 82 |
# Selector to delete all points in a collection (empty Filter() means no specific filter)
|
| 83 |
all_points_selector = FilterSelector(filter=Filter())
|
| 84 |
+
|
| 85 |
try:
|
| 86 |
qdrant_client.delete(
|
| 87 |
collection_name=KB_COLLECTION,
|
| 88 |
points_selector=all_points_selector
|
| 89 |
)
|
| 90 |
print(f"ποΈ All data cleaned from '{KB_COLLECTION}'.")
|
| 91 |
+
|
| 92 |
qdrant_client.delete(
|
| 93 |
collection_name=CHAT_HISTORY_COLLECTION,
|
| 94 |
points_selector=all_points_selector
|
| 95 |
)
|
| 96 |
print(f"ποΈ All data cleaned from '{CHAT_HISTORY_COLLECTION}'.")
|
| 97 |
print("ποΈ All old data cleaned from collections successfully.")
|
| 98 |
+
|
| 99 |
except Exception as e:
|
| 100 |
print(f"β Error during collection cleanup: {e}")
|
| 101 |
|
|
|
|
| 107 |
# Create indexes for filtering and ordering chat history
|
| 108 |
create_index_if_needed(CHAT_HISTORY_COLLECTION, "session_id", "keyword")
|
| 109 |
create_index_if_needed(CHAT_HISTORY_COLLECTION, "turn_number", "integer")
|
| 110 |
+
create_index_if_needed(CHAT_HISTORY_COLLECTION, "timestamp", "keyword") # Useful for sorting/filtering by time
|
| 111 |
|
| 112 |
# Create indexes for filtering knowledge base documents
|
| 113 |
create_index_if_needed(KB_COLLECTION, "session_id", "keyword")
|
| 114 |
create_index_if_needed(KB_COLLECTION, "upload_timestamp", "keyword")
|
| 115 |
create_index_if_needed(KB_COLLECTION, "file_type", "keyword")
|
| 116 |
+
create_index_if_needed(KB_COLLECTION, "source", "keyword") # Index source if you use it for filtering
|
| 117 |
|
| 118 |
+
# === IMPORTANT: Data wipe is now commented out ===
|
| 119 |
# This line will wipe all your data from Qdrant EVERY TIME the backend starts.
|
| 120 |
# It's useful for initial setup and debugging, but comment it out for persistence.
|
| 121 |
+
# Uncomment ONLY when you need to clear data manually:
|
| 122 |
+
# clean_collections()
|