daniel-was-taken commited on
Commit
244f753
·
1 Parent(s): e23989a

Change in app.py

Browse files
Files changed (2) hide show
  1. app.py +17 -10
  2. populate_db.py +36 -26
app.py CHANGED
@@ -31,8 +31,15 @@ MILVUS_URI = os.getenv("MILVUS_URI", "http://localhost:19530")
31
  milvus_client = MilvusClient(uri=MILVUS_URI)
32
  collection_name = "my_rag_collection"
33
 
 
34
  if not milvus_client.has_collection(collection_name):
35
  main()
 
 
 
 
 
 
36
 
37
  embedding_model = SentenceTransformer("BAAI/bge-small-en-v1.5")
38
 
@@ -61,7 +68,6 @@ def retrieve_relevant_documents(query: str, limit: int = 5) -> List[Dict]:
61
  limit=limit,
62
  output_fields=["text", "metadata"]
63
  )
64
- # print("search_results:", search_results[0])
65
  documents = []
66
  for result in search_results[0]:
67
  doc_info = {
@@ -128,15 +134,16 @@ def setup_rag_chain():
128
  When answering questions, you should:
129
  1. Use the provided context documents to inform your response
130
  2. Be accurate and helpful
131
- 3. If the context doesn't contain relevant information, say so clearly
132
- 4. Always reply in English
133
- 5. Provide clear recommendations wherever applicable
134
- 6. Do not make assumptions about the user's knowledge or background
135
- 7. If the user asks for a specific law or regulation, provide a brief explanation and cite relevant documents if available.
136
- 8. Do not overlook the importance of accessibility and inclusivity in your responses.
137
- 9. Do not overemphasize disability in your responses, but rather focus on the support and adjustments that can be made to ensure equality and inclusivity.
138
- 10. If the user asks about a specific disability, provide general information and resources, but do not make assumptions about the individual's experience or needs.
139
- 11. If the user query explicitly asks for a disability-related topic, provide a well-informed response based on the context documents.
 
140
 
141
  Context documents:
142
  {context}
 
31
  milvus_client = MilvusClient(uri=MILVUS_URI)
32
  collection_name = "my_rag_collection"
33
 
34
+ # Initialize collection once at startup
35
  if not milvus_client.has_collection(collection_name):
36
  main()
37
+ else:
38
+ # Check if collection has data, populate if empty
39
+ stats = milvus_client.get_collection_stats(collection_name)
40
+ if stats['row_count'] == 0:
41
+ main()
42
+ milvus_client.load_collection(collection_name=collection_name)
43
 
44
  embedding_model = SentenceTransformer("BAAI/bge-small-en-v1.5")
45
 
 
68
  limit=limit,
69
  output_fields=["text", "metadata"]
70
  )
 
71
  documents = []
72
  for result in search_results[0]:
73
  doc_info = {
 
134
  When answering questions, you should:
135
  1. Use the provided context documents to inform your response
136
  2. Be accurate and helpful
137
+ 3. Cite relevant documents in the format [1], [2], etc.
138
+ 4. If the context doesn't contain relevant information, say so clearly
139
+ 5. Always reply in English
140
+ 6. Provide clear recommendations wherever applicable
141
+ 7. Do not make assumptions about the user's knowledge or background
142
+ 8. If the user asks for a specific law or regulation, provide a brief explanation and cite relevant documents if available.
143
+ 9. Do not overlook the importance of accessibility and inclusivity in your responses.
144
+ 10. Do not overemphasize disability in your responses, but rather focus on the support and adjustments that can be made to ensure equality and inclusivity.
145
+ 11. If the user asks about a specific disability, provide general information and resources, but do not make assumptions about the individual's experience or needs.
146
+ 12. If the user query explicitly asks for a disability-related topic, provide a well-informed response based on the context documents.
147
 
148
  Context documents:
149
  {context}
populate_db.py CHANGED
@@ -13,10 +13,6 @@ MILVUS_URI = os.getenv("MILVUS_URI", "http://localhost:19530")
13
  milvus_client = MilvusClient(uri=MILVUS_URI)
14
  collection_name = "my_rag_collection"
15
 
16
- # Drop existing collection if it exists
17
- # if milvus_client.has_collection(collection_name):
18
- # milvus_client.drop_collection(collection_name)
19
-
20
  # Initialize embedding model
21
  embedding_model = SentenceTransformer("BAAI/bge-small-en-v1.5")
22
 
@@ -24,35 +20,49 @@ def emb_text(text):
24
  """Generate embeddings for text using the sentence transformer model."""
25
  return embedding_model.encode([text], normalize_embeddings=True).tolist()[0]
26
 
27
- # Create Milvus collection schema
28
- schema = milvus_client.create_schema(auto_id=False, enable_dynamic_field=False)
29
- schema.add_field(field_name="id", datatype=DataType.INT64, is_primary=True)
30
- schema.add_field(field_name="vector", datatype=DataType.FLOAT_VECTOR, dim=384) # BGE-small-en-v1.5 dimension
31
- schema.add_field(field_name="text", datatype=DataType.VARCHAR, max_length=32768) # 32KB max
32
- schema.add_field(field_name="metadata", datatype=DataType.JSON)
33
-
34
- # Create index for vector search
35
- index_params = MilvusClient.prepare_index_params()
36
- index_params.add_index(
37
- field_name="vector",
38
- metric_type="COSINE",
39
- index_type="AUTOINDEX",
40
- )
 
 
 
 
 
 
41
 
42
- # Create and load collection
43
- milvus_client.create_collection(
44
- collection_name=collection_name,
45
- schema=schema,
46
- index_params=index_params,
47
- consistency_level="Strong",
48
- )
49
- milvus_client.load_collection(collection_name=collection_name)
50
 
51
  # Document directory
52
  directory_path = "data/"
53
 
54
  def main():
55
  """Main function to load documents and insert them into Milvus."""
 
 
 
 
 
 
 
 
56
  docs = unstructured_document_loader()
57
 
58
  # Prepare data for insertion
 
13
  milvus_client = MilvusClient(uri=MILVUS_URI)
14
  collection_name = "my_rag_collection"
15
 
 
 
 
 
16
  # Initialize embedding model
17
  embedding_model = SentenceTransformer("BAAI/bge-small-en-v1.5")
18
 
 
20
  """Generate embeddings for text using the sentence transformer model."""
21
  return embedding_model.encode([text], normalize_embeddings=True).tolist()[0]
22
 
23
+ def create_collection():
24
+ """Create collection if it doesn't exist."""
25
+ if milvus_client.has_collection(collection_name):
26
+ milvus_client.load_collection(collection_name=collection_name)
27
+ return
28
+
29
+ # Create Milvus collection schema
30
+ schema = milvus_client.create_schema(auto_id=False, enable_dynamic_field=False)
31
+ schema.add_field(field_name="id", datatype=DataType.INT64, is_primary=True)
32
+ schema.add_field(field_name="vector", datatype=DataType.FLOAT_VECTOR, dim=384) # BGE-small-en-v1.5 dimension
33
+ schema.add_field(field_name="text", datatype=DataType.VARCHAR, max_length=32768) # 32KB max
34
+ schema.add_field(field_name="metadata", datatype=DataType.JSON)
35
+
36
+ # Create index for vector search
37
+ index_params = MilvusClient.prepare_index_params()
38
+ index_params.add_index(
39
+ field_name="vector",
40
+ metric_type="COSINE",
41
+ index_type="AUTOINDEX",
42
+ )
43
 
44
+ # Create and load collection
45
+ milvus_client.create_collection(
46
+ collection_name=collection_name,
47
+ schema=schema,
48
+ index_params=index_params,
49
+ consistency_level="Strong",
50
+ )
51
+ milvus_client.load_collection(collection_name=collection_name)
52
 
53
  # Document directory
54
  directory_path = "data/"
55
 
56
  def main():
57
  """Main function to load documents and insert them into Milvus."""
58
+ create_collection()
59
+
60
+ # Check if collection already has data
61
+ stats = milvus_client.get_collection_stats(collection_name)
62
+ if stats['row_count'] > 0:
63
+ print(f"Collection already contains {stats['row_count']} documents. Skipping insertion.")
64
+ return
65
+
66
  docs = unstructured_document_loader()
67
 
68
  # Prepare data for insertion