korupolujayanth2004 commited on
Commit
5782137
Β·
1 Parent(s): 60f33ef

Update qdrant_client.py

Browse files
Files changed (1) hide show
  1. backend/qdrant_client.py +40 -43
backend/qdrant_client.py CHANGED
@@ -10,96 +10,92 @@ load_dotenv()
10
  # === Environment Variables ===
11
  QDRANT_HOST = os.getenv(
12
  "QDRANT_HOST",
13
- "http://localhost:6333" # Default to local if not set, or your cloud URL
14
  )
15
- QDRANT_API_KEY = os.getenv("QDRANT_API_KEY") # Only needed for cloud Qdrant
16
 
17
  # === Collection Names ===
18
- KB_COLLECTION = "rag_collection" # For document embeddings
19
- CHAT_HISTORY_COLLECTION = "chat_history_collection" # For chat messages
20
 
21
  # === Qdrant Client Initialization ===
22
  qdrant_client = QdrantClient(
23
  url=QDRANT_HOST,
24
  api_key=QDRANT_API_KEY,
25
- prefer_grpc=False, # Set to True for gRPC, False for REST
26
- timeout=30, # Increased timeout for potentially slow operations
27
- # check_compatibility=False, # Uncomment if you face compatibility issues with Qdrant versions
28
  )
29
 
30
- print("βœ… Connected to Qdrant via REST")
31
 
32
  # === Collection Creation and Management ===
33
  def ensure_collection_exists(name: str, vector_size: int = 384):
34
  """
35
- Ensures a Qdrant collection exists. If not, it creates it.
 
36
  """
37
  try:
38
- # Check if collection exists
39
- info = qdrant_client.get_collection(collection_name=name)
40
- if info.status == CollectionStatus.GREEN:
41
- print(f"βœ… Collection exists and is ready: {name}")
42
- return
43
  except Exception:
44
- # Collection does not exist, so proceed to create
45
- pass
46
-
47
- print(f"πŸ†• Creating collection: {name}")
48
- qdrant_client.recreate_collection(
49
- collection_name=name,
50
- vectors_config=VectorParams(size=vector_size, distance=Distance.COSINE) # Cosine similarity for embeddings
51
- )
52
- print(f"πŸŽ‰ Collection '{name}' created successfully!")
53
-
 
 
54
 
55
  # === Payload Indexing Helper ===
56
  def create_index_if_needed(collection: str, field_name: str, schema_type: str):
57
  """
58
- Creates a payload index on a specified field within a collection if it doesn't already exist.
59
- Payload indexes speed up filtering operations (e.g., by session_id).
60
  """
61
  try:
62
  schema_enum = getattr(PayloadSchemaType, schema_type.upper())
63
  qdrant_client.create_payload_index(
64
  collection_name=collection,
65
  field_name=field_name,
66
- field_schema=schema_enum
67
  )
68
  print(f"πŸ”§ Indexed '{field_name}' as {schema_type} in '{collection}'")
69
  except Exception as e:
70
- # This error often means the index already exists, which is fine.
71
- if "already exists" in str(e): # More specific check
72
- # print(f"ℹ️ Index for '{field_name}' in '{collection}' already exists.")
73
- pass # Suppress common "already exists" error
74
  else:
75
- print(f"⚠️ Failed to create index for '{field_name}' in '{collection}': {e}")
76
-
77
 
78
  # === Data Cleanup Utility (for development/testing) ===
79
  def clean_collections():
80
  """
81
- Deletes all data from both the RAG knowledge base and chat history collections.
82
- USE WITH CAUTION: This will wipe your data!
83
  """
84
  print("🧹 Cleaning old data from all collections...")
85
-
86
  # Selector to delete all points in a collection (empty Filter() means no specific filter)
87
  all_points_selector = FilterSelector(filter=Filter())
88
-
89
  try:
90
  qdrant_client.delete(
91
  collection_name=KB_COLLECTION,
92
  points_selector=all_points_selector
93
  )
94
  print(f"πŸ—‘οΈ All data cleaned from '{KB_COLLECTION}'.")
95
-
96
  qdrant_client.delete(
97
  collection_name=CHAT_HISTORY_COLLECTION,
98
  points_selector=all_points_selector
99
  )
100
  print(f"πŸ—‘οΈ All data cleaned from '{CHAT_HISTORY_COLLECTION}'.")
101
  print("πŸ—‘οΈ All old data cleaned from collections successfully.")
102
-
103
  except Exception as e:
104
  print(f"❌ Error during collection cleanup: {e}")
105
 
@@ -111,15 +107,16 @@ ensure_collection_exists(CHAT_HISTORY_COLLECTION)
111
  # Create indexes for filtering and ordering chat history
112
  create_index_if_needed(CHAT_HISTORY_COLLECTION, "session_id", "keyword")
113
  create_index_if_needed(CHAT_HISTORY_COLLECTION, "turn_number", "integer")
114
- create_index_if_needed(CHAT_HISTORY_COLLECTION, "timestamp", "keyword") # Useful for sorting/filtering by time
115
 
116
  # Create indexes for filtering knowledge base documents
117
  create_index_if_needed(KB_COLLECTION, "session_id", "keyword")
118
  create_index_if_needed(KB_COLLECTION, "upload_timestamp", "keyword")
119
  create_index_if_needed(KB_COLLECTION, "file_type", "keyword")
120
- create_index_if_needed(KB_COLLECTION, "source", "keyword") # Index source if you use it for filtering
121
 
122
- # === IMPORTANT: Wipe previous data (OPTIONAL - COMMENT OUT AFTER FIRST RUN!) ===
123
  # This line will wipe all your data from Qdrant EVERY TIME the backend starts.
124
  # It's useful for initial setup and debugging, but comment it out for persistence.
125
- clean_collections()
 
 
10
  # === Environment Variables ===
11
  QDRANT_HOST = os.getenv(
12
  "QDRANT_HOST",
13
+ "https://9485db48-8672-469a-a917-41a4ebbfd533.us-east4-0.gcp.cloud.qdrant.io" # Your cloud URL
14
  )
15
+ QDRANT_API_KEY = os.getenv("QDRANT_API_KEY") # Only needed for cloud Qdrant
16
 
17
  # === Collection Names ===
18
+ KB_COLLECTION = "rag_collection" # For document embeddings
19
+ CHAT_HISTORY_COLLECTION = "chat_history_collection" # For chat messages
20
 
21
  # === Qdrant Client Initialization ===
22
  qdrant_client = QdrantClient(
23
  url=QDRANT_HOST,
24
  api_key=QDRANT_API_KEY,
25
+ prefer_grpc=False, # Set to True for gRPC, False for REST
26
+ timeout=30, # Increased timeout for potentially slow operations
27
+ check_compatibility=False, # Skip version check to avoid warnings
28
  )
29
 
30
+ print("βœ… Connected to Qdrant Cloud")
31
 
32
  # === Collection Creation and Management ===
33
  def ensure_collection_exists(name: str, vector_size: int = 384):
34
  """
35
+ Guarantees that a collection exists; creates it only if it is missing.
36
+ Uses create_collection (non-destructive) so no delete permission is needed.
37
  """
38
  try:
39
+ qdrant_client.get_collection(collection_name=name)
40
+ print(f"βœ… Collection '{name}' already exists.")
 
 
 
41
  except Exception:
42
+ print(f"πŸ†• Creating collection: {name}")
43
+ try:
44
+ qdrant_client.create_collection(
45
+ collection_name=name,
46
+ vectors_config=VectorParams(
47
+ size=vector_size,
48
+ distance=Distance.COSINE,
49
+ ),
50
+ )
51
+ print(f"πŸŽ‰ Collection '{name}' created successfully!")
52
+ except Exception as e:
53
+ print(f"❌ Failed to create collection '{name}': {e}")
54
 
55
  # === Payload Indexing Helper ===
56
  def create_index_if_needed(collection: str, field_name: str, schema_type: str):
57
  """
58
+ Creates a payload index on a field if it is not present already.
 
59
  """
60
  try:
61
  schema_enum = getattr(PayloadSchemaType, schema_type.upper())
62
  qdrant_client.create_payload_index(
63
  collection_name=collection,
64
  field_name=field_name,
65
+ field_schema=schema_enum,
66
  )
67
  print(f"πŸ”§ Indexed '{field_name}' as {schema_type} in '{collection}'")
68
  except Exception as e:
69
+ if "already exists" in str(e):
70
+ pass # Index is already there – ignore
 
 
71
  else:
72
+ print(f"⚠️ Could not create index '{field_name}' on '{collection}': {e}")
 
73
 
74
  # === Data Cleanup Utility (for development/testing) ===
75
  def clean_collections():
76
  """
77
+ Deletes ALL points from both collections.
78
+ Call it manually; do NOT run automatically in production.
79
  """
80
  print("🧹 Cleaning old data from all collections...")
81
+
82
  # Selector to delete all points in a collection (empty Filter() means no specific filter)
83
  all_points_selector = FilterSelector(filter=Filter())
84
+
85
  try:
86
  qdrant_client.delete(
87
  collection_name=KB_COLLECTION,
88
  points_selector=all_points_selector
89
  )
90
  print(f"πŸ—‘οΈ All data cleaned from '{KB_COLLECTION}'.")
91
+
92
  qdrant_client.delete(
93
  collection_name=CHAT_HISTORY_COLLECTION,
94
  points_selector=all_points_selector
95
  )
96
  print(f"πŸ—‘οΈ All data cleaned from '{CHAT_HISTORY_COLLECTION}'.")
97
  print("πŸ—‘οΈ All old data cleaned from collections successfully.")
98
+
99
  except Exception as e:
100
  print(f"❌ Error during collection cleanup: {e}")
101
 
 
107
  # Create indexes for filtering and ordering chat history
108
  create_index_if_needed(CHAT_HISTORY_COLLECTION, "session_id", "keyword")
109
  create_index_if_needed(CHAT_HISTORY_COLLECTION, "turn_number", "integer")
110
+ create_index_if_needed(CHAT_HISTORY_COLLECTION, "timestamp", "keyword") # Useful for sorting/filtering by time
111
 
112
  # Create indexes for filtering knowledge base documents
113
  create_index_if_needed(KB_COLLECTION, "session_id", "keyword")
114
  create_index_if_needed(KB_COLLECTION, "upload_timestamp", "keyword")
115
  create_index_if_needed(KB_COLLECTION, "file_type", "keyword")
116
+ create_index_if_needed(KB_COLLECTION, "source", "keyword") # Index source if you use it for filtering
117
 
118
+ # === IMPORTANT: Data wipe is now commented out ===
119
  # This line will wipe all your data from Qdrant EVERY TIME the backend starts.
120
  # It's useful for initial setup and debugging, but comment it out for persistence.
121
+ # Uncomment ONLY when you need to clear data manually:
122
+ # clean_collections()